Department of Mathematics
 Search | Help | Login | pdf version | printable version

Math @ Duke





.......................

.......................


Publications of David B. Dunson    :recent first  alphabetical  combined listing:

%% Books   
@book{fds338546,
   Author = {Gelman, A and Carlin, JB and Stern, HS and Dunson, DB and Vehtari, A and Rubin, DB},
   Title = {Bayesian data analysis, third edition},
   Pages = {1-646},
   Year = {2013},
   Month = {January},
   ISBN = {9781439840955},
   Abstract = {© 2013 by Taylor & Francis Group, LLC. Broadening its scope
             to nonstatisticians, Bayesian Methods for Data Analysis,
             Third Edition provides an accessible introduction to the
             foundations and applications of Bayesian analysis. Along
             with a complete reorganization of the material, this edition
             concentrates more on hierarchical Bayesian modeling as
             implemented via Markov chain Monte Carlo (MCMC) methods and
             related data analytic techniques. New to the Third Edition
             • New data examples, corresponding R and WinBUGS code, and
             homework problems • Explicit descriptions and
             illustrations of hierarchical modeling-now commonplace in
             Bayesian data analysis • A new chapter on Bayesian design
             that emphasizes Bayesian clinical trials • A completely
             revised and expanded section on ranking and histogram
             estimation • A new case study on infectious disease
             modeling and the 1918 flu epidemic • A solutions manual
             for qualifying instructors that contains solutions, computer
             code, and associated output for every homework
             problem-available both electronically and in print Ideal for
             Anyone Performing Statistical Analyses Focusing on
             applications from biostatistics, epidemiology, and medicine,
             this text builds on the popularity of its predecessors by
             making it suitable for even more practitioners and
             students.},
   Key = {fds338546}
}


%% Papers Published   
@article{fds258005,
   Author = {Dunson, WA and Paradise, CJ and Dunson, DB},
   Title = {Inhibitory effect of low salinity on growth and reproduction
             of the estuarine sheepshead minnow, Cyprinodon
             variegatus},
   Journal = {Copeia},
   Volume = {1998},
   Number = {1},
   Pages = {235-239},
   Publisher = {JSTOR},
   Year = {1998},
   Month = {February},
   url = {http://dx.doi.org/10.2307/1447727},
   Doi = {10.2307/1447727},
   Key = {fds258005}
}

@article{fds258006,
   Author = {Dunson, DB},
   Title = {Dose-dependent number of implants and implications in
             developmental toxicity.},
   Journal = {Biometrics},
   Volume = {54},
   Number = {2},
   Pages = {558-569},
   Year = {1998},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.2307/3109763},
   Abstract = {This paper proposes a method for assessing risk in
             developmental toxicity studies with exposure prior to
             implantation. The method proposed in this paper was
             developed to account for a dose-dependent trend in the
             number of implantation sites per dam, which is a common
             problem in studies with exposure prior to implantation.
             Toxins may have the effect of interfering with the early
             reproductive process, which can prevent implantation in the
             uterine wall. An imputation procedure is presented for
             estimating the number of potential fetuses by sampling from
             the empirical distribution of the number of implants per
             litter in the control group. The marginal death outcomes and
             the joint malformation and survival outcomes for each
             potential fetus can be estimated using multiple imputation
             or the chained data augmentation algorithm. Logit models can
             then be fit and used to estimate the effect of dose on
             reducing the probability of a normal birth. These models
             accommodate multiple covariate effects and can be applied to
             low-dose extrapolation. A simulation study is done to
             evaluate the properties of model-based estimators of the
             mean response and the virtually safe dose level (VSD). It
             was found that both estimates were good approximations of
             the underlying dose effect. A dominant lethal assay data set
             (Luning et al., 1966, Mutation Research 3, 444-451) is
             analyzed, and the results are compared with those of Rai and
             Van Ryzin.},
   Doi = {10.2307/3109763},
   Key = {fds258006}
}

@article{fds258008,
   Author = {Dunson, DB and Weinberg, CR and Perreault, SD and Chapin,
             RE},
   Title = {Summarizing the motion of self-propelled cells: applications
             to sperm motility.},
   Journal = {Biometrics},
   Volume = {55},
   Number = {2},
   Pages = {537-543},
   Year = {1999},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.1999.00537.x},
   Abstract = {Proper characterization of the motion of spermatozoa is an
             important prerequisite for interpreting differences in sperm
             motility that might arise from exposure to toxicants.
             Patterns of sperm movement can be extremely complex. On the
             basis of an exponential model that relates the discretely
             approximated curvilinear velocity to the tracking rate, we
             develop a statistic that indexes the predictability of the
             path for individual sperm. We summarize the path of each
             sperm using this and two other statistics: (1) the path
             displacement velocity and (2) linearity of movement. We
             apply the method to a set of rat sperm tracks representative
             of both normal and abnormal motion characteristics.},
   Doi = {10.1111/j.0006-341x.1999.00537.x},
   Key = {fds258008}
}

@article{fds258007,
   Author = {Dunson, DB and Baird, DD and Wilcox, AJ and Weinberg,
             CR},
   Title = {Day-specific probabilities of clinical pregnancy based on
             two studies with imperfect measures of ovulation.},
   Journal = {Human Reproduction},
   Volume = {14},
   Number = {7},
   Pages = {1835-1839},
   Year = {1999},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/14.7.1835},
   Abstract = {Two studies have related the timing of sexual intercourse
             (relative to ovulation) to day-specific fecundability. The
             first was a study of Catholic couples practising natural
             family planning in London in the 1950s and 1960s and the
             second was of North Carolina couples attempting to become
             pregnant in the early 1980s. The former identified ovulation
             based on the ovulatory shift in the basal body temperature,
             while the latter used urinary assays of hormones. We use a
             statistical model to correct for error in identifying
             ovulation and to re-estimate the length of the fertile
             window and day-specific fecundabilities. We estimate the
             same 6-day fertile interval in both studies after
             controlling for error. After adjusting for error both data
             sets showed the highest estimate of the probability of
             pregnancy on the day prior to ovulation and both fell close
             to zero after ovulation. Given that the fertile interval is
             before ovulation, methods that anticipate ovulation by
             several days (such as the assessment of cervical mucus)
             would be particularly useful for couples who want to time
             their intercourse either to avoid or facilitate
             conception.},
   Doi = {10.1093/humrep/14.7.1835},
   Key = {fds258007}
}

@article{fds258009,
   Author = {Dunson, WA and Dunson, DB},
   Title = {Factors influencing growth and survival of the killifish,
             Rivulus marmoratus, held inside enclosures in mangrove
             swamps},
   Journal = {Copeia},
   Volume = {1999},
   Number = {3},
   Pages = {661-668},
   Publisher = {JSTOR},
   Year = {1999},
   Month = {August},
   url = {http://dx.doi.org/10.2307/1447598},
   Abstract = {We measured growth and survival in field enclosures of
             juvenile Rivulus marmoratus under a variety of biotic
             (effects of body mass and intraspecific density) and abiotic
             conditions (seasonal climatic changes, site-specific
             hypoxia). We also tested three different enclosure types,
             surface-floating buckets (0.021 m3), and tubes (0.006 m3)
             positioned at the surface or on the bottom. Growth rate was
             inversely correlated with wet body mass (between 6 and 42
             mg) and density (1-16 fish/0.021 m3enclosure). However,
             density did not affect survival. Growth was significantly
             lower in tubes placed on the bottom than at the surface.
             There were considerable differences in growth and survival
             among sites. This likely is due to differences in occurrence
             and persistence of hypoxic events. At the Catfish Creek
             location (a pool surrounded by black mangroves), the bottom
             was routinely hypoxic. At a shallow bay site, hypoxia was
             episodic: on the bottom at dawn (O2) < 2 mg/l occurred on
             nine of 48 days, with values < 1 mg/l on two of 48 days.
             Maximum growth rates (3.5-4%/day) were recorded in February
             to May, in comparison with lower values in December to
             January. However, low growth rates also occurred in the
             spring, probably caused by episodic hypoxia.},
   Doi = {10.2307/1447598},
   Key = {fds258009}
}

@article{fds258010,
   Author = {Dunson, DB and Haseman, JK},
   Title = {Modeling tumor onset and multiplicity using transition
             models with latent variables.},
   Journal = {Biometrics},
   Volume = {55},
   Number = {3},
   Pages = {965-970},
   Year = {1999},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.1999.00965.x},
   Abstract = {We describe a method for modeling carcinogenicity from
             animal studies where the data consist of counts of the
             number of tumors present over time. The research is
             motivated by applications to transgenic rodent studies,
             which have emerged as an alternative to chronic bioassays
             for screening possible carcinogens. In transgenic mouse
             studies, the endpoint of interest is frequently skin
             papilloma, with weekly examinations determining how many
             papillomas each animal has at a particular point in time. It
             is assumed that each animal has two unobservable latent
             variables at each time point. The first indicates whether or
             not the tumors are in a multiplying state and the second is
             the potential number of additional tumors if the tumors are
             in a multiplying state. The product of these variables
             follows a zero-inflated Poisson distribution, and the EM
             algorithm can be used to maximize the observed-data
             pseudo-likelihood, based on the latent variables. A
             generalized estimating equations robust variance estimator
             adjusts for dependency among outcomes within individual
             animals. The method is applied to testing for a dose-related
             trend in both tumor incidence and multiplicity in
             carcinogenicity studies.},
   Doi = {10.1111/j.0006-341x.1999.00965.x},
   Key = {fds258010}
}

@article{fds257885,
   Author = {Dunson, DB and Weinberg, CR},
   Title = {Accounting for unreported and missing intercourse in human
             fertility studies},
   Journal = {Statistics in Medicine},
   Volume = {19},
   Number = {5},
   Pages = {665-679},
   Year = {2000},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/(SICI)1097-0258(20000315)19:5<665::AID-SIM391>3.0.CO},
   Abstract = {In prospective studies of human fertility that attempt to
             identify days of ovulation, couples record each day whether
             they had intercourse. Depending on the design of the study,
             couples either (I) mark the dates of intercourse on a chart
             or (II) mark 'yes' or 'no' for each day of the menstrual
             cycle. If protocol I is used, intercourse dates that couples
             fail to record are indistinguishable from dates of no
             intercourse. Consequently, estimates of day-specific
             fecundability are biased upwards. If protocol II is used,
             data from menstrual cycles with missing intercourse
             information must be discarded in order to fit current
             fertility models. We propose methods to account for
             unreported and missing intercourse under the assumption that
             the missingness mechanism is independent of time conditional
             on the unobservable true intercourse status. We use probit
             mixture models to allow for heterogeneity among couples,
             both in fecundability and in the missingness and
             non-reporting mechanisms. Markov chain Monte Carlo (MCMC)
             techniques are used for Bayesian estimation. The methods are
             generally applicable to the analysis of aggregated Bernoulli
             outcomes when there is uncertainty in whether a given trial,
             out of a series of trials, was completed. We illustrate the
             methods by application to two prospective fertility
             studies.},
   Doi = {10.1002/(SICI)1097-0258(20000315)19:5<665::AID-SIM391>3.0.CO},
   Key = {fds257885}
}

@article{fds258013,
   Author = {Dunson, DB},
   Title = {Models for papilloma multiplicity and regression:
             Applications to transgenic mouse studies},
   Journal = {Journal of the Royal Statistical Society. Series C, Applied
             Statistics},
   Volume = {49},
   Number = {1},
   Pages = {19-30},
   Publisher = {WILEY},
   Year = {2000},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9876.00176},
   Abstract = {In cancer studies that use transgenic or knockout mice, skin
             tumour counts are recorded over time to measure
             tumorigenicity. In these studies cancer biologists are
             interested in the effect of endogenous and/or exogenous
             factors on papilloma onset, multiplicity and regression. In
             this paper an analysis of data from a study conducted by the
             National Institute of Environmental Health Sciences on the
             effect of genetic factors on skin tumorigenesis is
             presented. Papilloma multiplicity and regression are
             modelled by using Bernoulli, Poisson and binomial latent
             variables, each of which can depend on covariates and
             previous outcomes. An EM algorithm is proposed for parameter
             estimation, and generalized estimating equations adjust for
             extra dependence between outcomes within individual animals.
             A Cox proportional hazards model is used to describe
             covariate effects on the onset of tumours.},
   Doi = {10.1111/1467-9876.00176},
   Key = {fds258013}
}

@article{fds258014,
   Author = {Dunson, DB},
   Title = {Bayesian latent variable models for clustered mixed
             outcomes},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {62},
   Number = {2},
   Pages = {355-366},
   Publisher = {WILEY},
   Year = {2000},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9868.00236},
   Abstract = {A general framework is proposed for modelling clustered
             mixed outcomes. A mixture of generalized linear models is
             used to describe the joint distribution of a set of
             underlying variables, and an arbitrary function relates the
             underlying variables to the observed outcomes. The model
             accommodates multilevel data structures, general covariate
             effects and distinct link functions and error distributions
             for each underlying variable. Within the framework proposed,
             novel models are developed for clustered multiple binary,
             unordered categorical and joint discrete and continuous
             outcomes. A Markov chain Monte Carlo sampling algorithm is
             described for estimating the posterior distributions of the
             parameters and latent variables. Because of the flexibility
             of the modelling framework and estimation procedure,
             extensions to ordered categorical outcomes and more complex
             data structures are straightforward. The methods are
             illustrated by using data from a reproductive toxicity
             study.},
   Doi = {10.1111/1467-9868.00236},
   Key = {fds258014}
}

@article{fds257883,
   Author = {Dunson, DB and Weinberg, CR},
   Title = {Modeling human fertility in the presence of measurement
             error.},
   Journal = {Biometrics},
   Volume = {56},
   Number = {1},
   Pages = {288-292},
   Year = {2000},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2000.00288.x},
   Abstract = {The probability of conception in a given menstrual cycle is
             closely related to the timing of intercourse relative to
             ovulation. Although commonly used markers of time of
             ovulation are known to be error prone, most fertility models
             assume the day of ovulation is measured without error. We
             develop a mixture model that allows the day to be
             misspecified. We assume that the measurement errors are
             i.i.d. across menstrual cycles. Heterogeneity among couples
             in the per cycle likelihood of conception is accounted for
             using a beta mixture model. Bayesian estimation is
             straightforward using Markov chain Monte Carlo techniques.
             The methods are applied to a prospective study of couples at
             risk of pregnancy. In the absence of validation data or
             multiple independent markers of ovulation, the
             identifiability of the measurement error distribution
             depends on the assumed model. Thus, the results of studies
             relating the timing of intercourse to the probability of
             conception should be interpreted cautiously.},
   Doi = {10.1111/j.0006-341x.2000.00288.x},
   Key = {fds257883}
}

@article{fds257884,
   Author = {Weinberg, CR and Dunson, DB},
   Title = {Some Issues in Assessing Human Fertility},
   Journal = {Journal of the American Statistical Association},
   Volume = {95},
   Number = {449},
   Pages = {300-303},
   Booktitle = {Statistics in the 21st Century},
   Publisher = {Informa UK Limited},
   Year = {2000},
   Month = {March},
   ISBN = {9781420035391},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2000.10473928},
   Abstract = {© 2002 by American Statistical Association. One of the
             pleasures of working as an applied statistician is the
             awareness it brings of the wide diversity of scientific
             fields to which our profession contributes critical concepts
             and methods. My own awareness was enhanced by accepting the
             invitation from the editors of JASA to serve as guest editor
             for this section of vignettes celebrating the significant
             contributions made by statisticians to the life and medical
             sciences in the 20th century. The goal of the project was
             not an encyclopedic catalog of all the major developments,
             but rather a sampling of some of the most interesting work.
             Of the 12 vignettes, 10 focus on particular areas of
             application: environmetrics, wildlife populations, animal
             breeding, human fertility, toxicology, medical diagnosis,
             clinical trials, environmental epidemiology, statistical
             genetics, and molecular biology. The two vignettes that
             begin the series focus more on methods that have had, or
             promise to have, impact across a range of subject matter
             areas: survival analysis and causal analysis.},
   Doi = {10.1080/01621459.2000.10473928},
   Key = {fds257884}
}

@article{fds257882,
   Author = {Dunson, DB and Haseman, JK and van Birgelen, AP and Stasiewicz, S and Tennant, RW},
   Title = {Statistical analysis of skin tumor data from Tg.AC mouse
             bioassays.},
   Journal = {Toxicological Sciences},
   Volume = {55},
   Number = {2},
   Pages = {293-302},
   Year = {2000},
   Month = {June},
   url = {http://dx.doi.org/10.1093/toxsci/55.2.293},
   Abstract = {New strategies for identifying chemical carcinogens and
             assessing risk have been proposed based on the Tg.AC
             (zetaglobin promoted v-Ha-ras) transgenic mouse. Preliminary
             studies suggest that the Tg. AC mouse bioassay may be an
             effective means of quickly evaluating the carcinogenic
             potential of a test agent. The skin of the Tg.AC mouse is
             genetically initiated, and the induction of epidermal
             papillomas in response to dermal or oral exposure to a
             chemical agent acts as a reporter phenotype of the activity
             of the test chemical. In Tg.AC mouse bioassays, the test
             agent is typically applied topically for up to 26 weeks, and
             the number of papillomas in the treated area is counted
             weekly. Statistical analyses are complicated by
             within-animal and serial dependency in the papilloma counts,
             survival differences between animals, and missing data. In
             this paper, we describe a statistical model for the analysis
             of skin tumor data from a Tg.AC mouse bioassay. The model
             separates effects on papilloma latency and multiplicity and
             accommodates important features of the data, including
             variability in expression of the transgene and dependency in
             the tumor counts. Methods are described for carcinogenicity
             testing and risk assessment. We illustrate our approach
             using data from a study of the effect of 2,3,7,
             8-tetrachlorodibenzo-p-dioxin (TCDD) exposure on
             tumorigenesis.},
   Doi = {10.1093/toxsci/55.2.293},
   Key = {fds257882}
}

@article{fds258015,
   Author = {Dunson, DB},
   Title = {Assessing overall risk in reproductive experiments.},
   Journal = {Risk Analysis : an Official Publication of the Society for
             Risk Analysis},
   Volume = {20},
   Number = {4},
   Pages = {429-437},
   Year = {2000},
   Month = {August},
   url = {http://dx.doi.org/10.1111/0272-4332.204042},
   Abstract = {Toxicologists are often interested in assessing the joint
             effect of an exposure on multiple reproductive endpoints,
             including early loss, fetal death, and malformation.
             Exposures that occur prior to mating or extremely early in
             development can adversely affect the number of implantation
             sites or fetuses that form within each dam and may even
             prevent pregnancy. A simple approach for assessing overall
             adverse effects in such studies is to consider fetuses or
             implants that fail to develop due to exposure as missing
             data. The missing data can be imputed, and standard methods
             for the analysis of quantal response data can then be used
             for quantitative risk assessment or testing. In this
             article, a new bias-corrected imputation procedure is
             proposed and evaluated. The procedure is straightforward to
             implement in standard statistical packages and has excellent
             operating characteristics when used in combination with a
             marginal model fit with generalized estimating equations.
             The methods are applied to data from a reproductive toxicity
             study of Nitrofurazone conducted by the National Toxicology
             Program.},
   Doi = {10.1111/0272-4332.204042},
   Key = {fds258015}
}

@article{fds257887,
   Author = {Wilcox, AJ and Dunson, D and Baird, DD},
   Title = {The timing of the "fertile window" in the menstrual cycle:
             day specific estimates from a prospective
             study.},
   Journal = {Bmj (Clinical Research Ed.)},
   Volume = {321},
   Number = {7271},
   Pages = {1259-1262},
   Year = {2000},
   Month = {November},
   ISSN = {0959-8146},
   url = {http://dx.doi.org/10.1136/bmj.321.7271.1259},
   Abstract = {OBJECTIVES:To provide specific estimates of the likely
             occurrence of the six fertile days (the "fertile window")
             during the menstrual cycle. DESIGN:Prospective cohort study.
             PARTICIPANTS:221 healthy women who were planning a
             pregnancy. MAIN OUTCOME MEASURES:The timing of ovulation in
             696 menstrual cycles, estimated using urinary metabolites of
             oestrogen and progesterone. RESULTS:The fertile window
             occurred during a broad range of days in the menstrual
             cycle. On every day between days 6 and 21, women had at
             minimum a 10% probability of being in their fertile window.
             Women cannot predict a sporadic late ovulation; 4-6% of
             women whose cycles had not yet resumed were potentially
             fertile in the fifth week of their cycle. CONCLUSIONS:In
             only about 30% of women is the fertile window entirely
             within the days of the menstrual cycle identified by
             clinical guidelines-that is, between days 10 and 17. Most
             women reach their fertile window earlier and others much
             later. Women should be advised that the timing of their
             fertile window can be highly unpredictable, even if their
             cycles are usually regular.},
   Doi = {10.1136/bmj.321.7271.1259},
   Key = {fds257887}
}

@article{fds258012,
   Author = {Dunson, DB and Tindall, KR},
   Title = {Bayesian analysis of mutational spectra.},
   Journal = {Genetics},
   Volume = {156},
   Number = {3},
   Pages = {1411-1418},
   Year = {2000},
   Month = {November},
   Abstract = {Studies that examine both the frequency of gene mutation and
             the pattern or spectrum of mutational changes can be used to
             identify chemical mutagens and to explore the molecular
             mechanisms of mutagenesis. In this article, we propose a
             Bayesian hierarchical modeling approach for the analysis of
             mutational spectra. We assume that the total number of
             independent mutations and the numbers of mutations falling
             into different response categories, defined by location
             within a gene and/or type of alteration, follow binomial and
             multinomial sampling distributions, respectively. We use
             prior distributions to summarize past information about the
             overall mutation frequency and the probabilities
             corresponding to the different mutational categories. These
             priors can be chosen on the basis of data from previous
             studies using an approach that accounts for heterogeneity
             among studies. Inferences about the overall mutation
             frequency, the proportions of mutations in each response
             category, and the category-specific mutation frequencies can
             be based on posterior distributions, which incorporate past
             and current data on the mutant frequency and on DNA sequence
             alterations. Methods are described for comparing groups and
             for assessing dose-related trends. We illustrate our
             approach using data from the literature.},
   Key = {fds258012}
}

@article{fds257886,
   Author = {Dunson, DB and Zhou, H},
   Title = {A Bayesian Model for Fecundability and Sterility},
   Journal = {Journal of the American Statistical Association},
   Volume = {95},
   Number = {452},
   Pages = {1054-1062},
   Publisher = {Informa UK Limited},
   Year = {2000},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2000.10474302},
   Abstract = {There is increasing evidence that exposure to environmental
             toxins during key stages of development can disrupt the
             human reproductive system. Such effects have proven
             difficult to study due to the many behavioral and biological
             factors involved in human reproduction. We analyze data from
             a North Carolina fertility study to assess the effect of
             prenatal, childhood, and current cigarette smoking exposure
             on fecundability and sterility. We use a mixture model that
             adjusts for timing and frequency of intercourse and allows
             both fecundability and sterility to depend on multiple
             covariates. We account for dependency among menstrual cycles
             within individual couples using a mixture density for a
             latent cycle viability variable. The mixture consists of a
             normal distribution describing heterogeneity among fecund
             couples with a point mass at 0 for sterile couples. The
             resulting distribution is more biologically plausible than
             the standard beta density. A Markov chain Monte Carlo scheme
             is used for Bayesian estimation of the model. There is some
             evidence that spontaneous intrauterine mortality results in
             decreased fecundability in subsequent cycles. Both current
             cigarette smoking and prenatal exposure of the woman to her
             mother's cigarette smoking are shown to be associated with a
             decrease in the probability of menstrual cycle viability. ©
             2000 Taylor & Francis Group, LLC.},
   Doi = {10.1080/01621459.2000.10474302},
   Key = {fds257886}
}

@article{fds258011,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Distinguishing effects on tumor multiplicity and growth rate
             in chemoprevention experiments.},
   Journal = {Biometrics},
   Volume = {56},
   Number = {4},
   Pages = {1068-1075},
   Year = {2000},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2000.01068.x},
   Abstract = {In some types of cancer chemoprevention experiments and
             short-term carcinogenicity bioassays, the data consist of
             the number of observed tumors per animal and the times at
             which these tumors were first detected. In such studies,
             there is interest in distinguishing between treatment
             effects on the number of tumors induced by a known
             carcinogen and treatment effects on the tumor growth rate.
             Since animals may die before all induced tumors reach a
             detectable size, separation of these effects can be
             difficult. This paper describes a flexible parametric model
             for data of this type. Under our model, the tumor detection
             times are realizations of a delayed Poisson process that is
             characterized by the age-specific tumor induction rate and a
             random latency interval between tumor induction and
             detection. The model accommodates distinct treatment and
             animal-specific effects on the number of induced tumors
             (multiplicity) and the time to tumor detection (growth
             rate). A Gibbs sampler is developed for estimation of the
             posterior distributions of the parameters. The methods are
             illustrated through application to data from a breast cancer
             chemoprevention experiment.},
   Doi = {10.1111/j.0006-341x.2000.01068.x},
   Key = {fds258011}
}

@article{fds257892,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Bayesian incidence analysis of animal tumorigenicity
             data},
   Journal = {Journal of the Royal Statistical Society. Series C, Applied
             Statistics},
   Volume = {50},
   Number = {2},
   Pages = {125-141},
   Publisher = {WILEY},
   Year = {2001},
   Month = {January},
   url = {http://dx.doi.org/10.1111/1467-9876.00224},
   Abstract = {Statistical inference about tumorigenesis should focus on
             the tumour incidence rate. Unfortunately, in most animal
             carcinogenicity experiments, tumours are not observable in
             live animals and censoring of the tumour onset times is
             informative. In this paper, we propose a Bayesian method for
             analysing data from such studies. Our approach focuses on
             the incidence of tumours and accommodates occult tumours and
             censored onset times without restricting tumour lethality,
             relying on cause-of-death data, or requiring interim
             sacrifices. We represent the underlying state of nature by a
             multistate stochastic process and assume general probit
             models for the time-specific transition rates. These models
             allow the incorporation of covariates, historical control
             data and subjective prior information. The inherent
             flexibility of this approach facilitates the interpretation
             of results, particularly when the sample size is small or
             the data are sparse. We use a Gibbs sampler to estimate the
             relevant posterior distributions. The methods proposed are
             applied to data from a US National Toxicology Program
             carcinogenicity study.},
   Doi = {10.1111/1467-9876.00224},
   Key = {fds257892}
}

@article{fds257890,
   Author = {Dunson, DB and Perreault, SD},
   Title = {Factor analytic models of clustered multivariate data with
             informative censoring.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {1},
   Pages = {302-308},
   Year = {2001},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.00302.x},
   Abstract = {This article describes a general class of factor analytic
             models for the analysis of clustered multivariate data in
             the presence of informative missingness. We assume that
             there are distinct sets of cluster-level latent variables
             related to the primary outcomes and to the censoring
             process, and we account for dependency between these latent
             variables through a hierarchical model. A linear model is
             used to relate covariates and latent variables to the
             primary outcomes for each subunit. A generalized linear
             model accounts for covariate and latent variable effects on
             the probability of censoring for subunits within each
             cluster. The model accounts for correlation within clusters
             and within subunits through a flexible factor analytic
             framework that allows multiple latent variables and
             covariate effects on the latent variables. The structure of
             the model facilitates implementation of Markov chain Monte
             Carlo methods for posterior estimation. Data from a
             spermatotoxicity study are analyzed to illustrate the
             proposed approach.},
   Doi = {10.1111/j.0006-341x.2001.00302.x},
   Key = {fds257890}
}

@article{fds257891,
   Author = {Dunson, DB},
   Title = {Modeling of changes in tumor burden},
   Journal = {Journal of Agricultural, Biological, and Environmental
             Statistics},
   Volume = {6},
   Number = {1},
   Pages = {38-48},
   Publisher = {Springer Nature},
   Year = {2001},
   Month = {March},
   url = {http://dx.doi.org/10.1198/108571101300325238},
   Abstract = {Skin painting studies on transgenic mice have recently been
             approved by the Food and Drug Administration (FDA) for
             carcinogenicity testing. Data consist of serial skin tumor
             counts on the backs of shaved mice in each of several dose
             groups. Current methods for assessing the tumorigenicity of
             test compounds are based on generalized estimating equations
             and require large samples. This paper proposes a new
             framework for modeling of the change over time in the
             papilloma burden in each mouse. A latent variable underlying
             the observed papilloma response is assumed to follow a
             generalized linear mixed-effects transition model. The model
             accounts for heterogeneity among animals and serial
             dependency in the skin tumor counts. Extensions of existing
             Markov chain Monte Carlo procedures for Bayesian estimation
             in generalized linear mixed models are proposed. The methods
             are applied to data from a National Toxicology Program
             short-term carcinogenicity study of lauric
             acid.},
   Doi = {10.1198/108571101300325238},
   Key = {fds257891}
}

@article{fds257894,
   Author = {Dunson, DB and Weinberg, CR and Baird, DD and Kesner, JS and Wilcox,
             AJ},
   Title = {Assessing human fertility using several markers of
             ovulation.},
   Journal = {Statistics in Medicine},
   Volume = {20},
   Number = {6},
   Pages = {965-978},
   Year = {2001},
   Month = {March},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/sim.716},
   Abstract = {In modelling human fertility one ideally accounts for timing
             of intercourse relative to ovulation. Measurement error in
             identifying the day of ovulation can bias estimates of
             fecundability parameters and attenuate estimates of
             covariate effects. In the absence of a single perfect marker
             of ovulation, several error prone markers are sometimes
             obtained. In this paper we propose a semi-parametric mixture
             model that uses multiple independent markers of ovulation to
             account for measurement error. The model assigns each method
             of assessing ovulation a distinct non-parametric error
             distribution, and corrects bias in estimates of day-specific
             fecundability. We use a Monte Carlo EM algorithm for joint
             estimation of (i) the error distribution for the markers,
             (ii) the error-corrected fertility parameters, and (iii) the
             couple-specific random effects. We apply the methods to data
             from a North Carolina fertility study to assess the
             magnitude of error in measures of ovulation based on urinary
             luteinizing hormone and metabolites of ovarian hormones, and
             estimate the corrected day-specific probabilities of
             clinical pregnancy. Published in 2001 by John Wiley & Sons,
             Ltd.},
   Doi = {10.1002/sim.716},
   Key = {fds257894}
}

@article{fds257893,
   Author = {Wilcox, AJ and Dunson, DB and Weinberg, CR and Trussell, J and Baird,
             DD},
   Title = {Likelihood of conception with a single act of intercourse:
             providing benchmark rates for assessment of post-coital
             contraceptives.},
   Journal = {Contraception},
   Volume = {63},
   Number = {4},
   Pages = {211-215},
   Year = {2001},
   Month = {April},
   ISSN = {0010-7824},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/11376648},
   Abstract = {Emergency post-coital contraceptives effectively reduce the
             risk of pregnancy, but their degree of efficacy remains
             uncertain. Measurement of efficacy depends on the pregnancy
             rate without treatment, which cannot be measured directly.
             We provide indirect estimates of such pregnancy rates, using
             data from a prospective study of 221 women who were
             attempting to conceive. We previously estimated the
             probability of pregnancy with an act of intercourse relative
             to ovulation. In this article, we extend these data to
             estimate the probability of pregnancy relative to
             intercourse on a given cycle day (counting from onset of
             previous menses). In assessing the efficacy of post-coital
             contraceptives, other approaches have not incorporated
             accurate information on the variability of ovulation. We
             find that the possibility of late ovulation produces a
             persistent risk of pregnancy even into the sixth week of the
             cycle. Post-coital contraceptives may be indicated even when
             intercourse has occurred late in the cycle.},
   Doi = {10.1016/s0010-7824(01)00191-3},
   Key = {fds257893}
}

@article{fds257895,
   Author = {Nyska, A and Lomnitski, L and Spalding, J and Dunson, DB and Goldsworthy, TL and Ben-Shaul, V and Grossman, S and Bergman, M and Boorman, G},
   Title = {Topical and oral administration of the natural water-soluble
             antioxidant from spinach reduces the multiplicity of
             papillomas in the Tg.AC mouse model.},
   Journal = {Toxicology Letters},
   Volume = {122},
   Number = {1},
   Pages = {33-44},
   Year = {2001},
   Month = {May},
   ISSN = {0378-4274},
   url = {http://dx.doi.org/10.1016/s0378-4274(01)00345-9},
   Abstract = {The Tg.AC mouse carrying the v-Ha-ras structural gene is a
             useful model for the study of chemical carcinogens,
             especially those acting via non-genotoxic mechanisms. This
             study evaluated the efficacy of the non-toxic, water-soluble
             antioxidant from spinach, natural antioxidant (NAO), in
             reducing skin papilloma induction in female hemizygous Tg.AC
             mice treated dermally five times over 2.5 weeks with 2.5
             microg 12-O-tetradecanoylphorbol-13-acetate (TPA). The
             TPA-only group was considered as a control; the other two
             groups received, additionally, NAO topically (2 mg) or
             orally (100 mg/kg), 5 days/week for 5 weeks. Papilloma
             counts made macroscopically during the clinical observations
             showed a significant decrease in multiplicity (P<0.01) in
             the NAO topically treated group. According to histological
             criteria, papilloma multiplicity were lower in both
             topical-NAO and oral-NAO groups, but significantly so only
             in the oral-NAO mice (P<0.01). The beneficial effect of NAO
             in the Tg.AC mouse is reported.},
   Doi = {10.1016/s0378-4274(01)00345-9},
   Key = {fds257895}
}

@article{fds257896,
   Author = {Dunson, DB and Baird, DD},
   Title = {A flexible parametric model for combining current status and
             age at first diagnosis data.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {2},
   Pages = {396-403},
   Year = {2001},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.00396.x},
   Abstract = {In some cross-sectional studies of chronic disease, data
             consist of the age at examination, whether the disease was
             present at the exam, and recall of the age at first
             diagnosis. This article describes a flexible parametric
             approach for combining current status and age at first
             diagnosis data. We assume that the log odds of onset by a
             given age and of detection by a given age conditional on
             onset by that age are nondecreasing functions of time plus
             linear combinations of covariates. Piecewise linear models
             are used to characterize changes across time in the baseline
             odds. Methods are described for accommodating informatively
             missing current status data and inferences based on the
             age-specific incidence of disease prior to a landmark event
             (e.g., puberty, menopause). Our formulation enables
             straightforward maximum likelihood estimation without
             requiring restrictive parametric or Markov assumptions. The
             methods are applied to data from a study of uterine
             fibroids.},
   Doi = {10.1111/j.0006-341x.2001.00396.x},
   Key = {fds257896}
}

@article{fds257897,
   Author = {Dunson, DB},
   Title = {Commentary: practical advantages of Bayesian analysis of
             epidemiologic data.},
   Journal = {American Journal of Epidemiology},
   Volume = {153},
   Number = {12},
   Pages = {1222-1226},
   Year = {2001},
   Month = {June},
   url = {http://dx.doi.org/10.1093/aje/153.12.1222},
   Abstract = {In the past decade, there have been enormous advances in the
             use of Bayesian methodology for analysis of epidemiologic
             data, and there are now many practical advantages to the
             Bayesian approach. Bayesian models can easily accommodate
             unobserved variables such as an individual's true disease
             status in the presence of diagnostic error. The use of prior
             probability distributions represents a powerful mechanism
             for incorporating information from previous studies and for
             controlling confounding. Posterior probabilities can be used
             as easily interpretable alternatives to p values. Recent
             developments in Markov chain Monte Carlo methodology
             facilitate the implementation of Bayesian analyses of
             complex data sets containing missing observations and
             multidimensional outcomes. Tools are now available that
             allow epidemiologists to take advantage of this powerful
             approach to assessment of exposure-disease
             relations.},
   Doi = {10.1093/aje/153.12.1222},
   Key = {fds257897}
}

@article{fds257898,
   Author = {Robbins, WA and Witt, KL and Haseman, JK and Dunson, DB and Troiani, L and Cohen, MS and Hamilton, CD and Perreault, SD and Libbus, B and Beyler,
             SA and Raburn, DJ and Tedder, ST and Shelby, MD and Bishop,
             JB},
   Title = {Antiretroviral therapy effects on genetic and morphologic
             end points in lymphocytes and sperm of men with human
             immunodeficiency virus infection.},
   Journal = {The Journal of Infectious Diseases},
   Volume = {184},
   Number = {2},
   Pages = {127-135},
   Year = {2001},
   Month = {July},
   ISSN = {0022-1899},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/11424008},
   Abstract = {Many human immunodeficiency virus (HIV)-infected persons
             receive prolonged treatment with DNA-reactive antiretroviral
             drugs. A prospective study was conducted of 26 HIV-infected
             men who provided samples before treatment and at multiple
             times after beginning treatment, to investigate effects of
             antiretrovirals on lymphocyte and sperm chromosomes and
             semen quality. Several antiretroviral regimens, all
             including a nucleoside component, were used. Lymphocyte
             metaphase analysis and sperm fluorescence in situ
             hybridization were used for cytogenetic studies. Semen
             analyses included conventional parameters (volume,
             concentration, viability, motility, and morphology). No
             significant effects on cytogenetic parameters, semen volume,
             or sperm concentration were detected. However, there were
             significant improvements in sperm motility for men with
             study entry CD4 cell counts >200 cells/mm(3), sperm
             morphology for men with entry CD4 cell counts < or =200
             cells/mm(3), and the percentage of viable sperm in both
             groups. These findings suggest that nucleoside-containing
             antiretrovirals administered via recommended protocols do
             not induce chromosomal changes in lymphocytes or sperm but
             may produce improvements in semen quality.},
   Doi = {10.1086/322002},
   Key = {fds257898}
}

@article{fds257899,
   Author = {Nyska, A and Lomnitski, L and Spalding, J and Dunson, DB and Goldsworthy, TL and Ben-Shaul, V and Grossman, S and Bergman, M and Boorman, G},
   Title = {Erratum: Topical and oral administration of the natural
             water-soluble antioxidant from spinach reduces the
             multiplicity of papillomas in the Tg.AC mouse model
             (Toxicology Letters (2001) 122 (33-44) PII:
             S0378427401003459)},
   Journal = {Toxicology Letters},
   Volume = {123},
   Number = {2-3},
   Pages = {237},
   Publisher = {Elsevier BV},
   Year = {2001},
   Month = {September},
   ISSN = {0378-4274},
   url = {http://dx.doi.org/10.1016/S0378-4274(01)00417-9},
   Doi = {10.1016/S0378-4274(01)00417-9},
   Key = {fds257899}
}

@article{fds257901,
   Author = {Wilcox, AJ and Baird, DD and Dunson, D and McChesney, R and Weinberg,
             CR},
   Title = {Natural limits of pregnancy testing in relation to the
             expected menstrual period.},
   Journal = {Jama},
   Volume = {286},
   Number = {14},
   Pages = {1759-1761},
   Year = {2001},
   Month = {October},
   ISSN = {0098-7484},
   url = {http://dx.doi.org/10.1001/jama.286.14.1759},
   Abstract = {Pregnancy test kits routinely recommend testing "as early as
             the first day of the missed period." However, a pregnancy
             cannot be detected before the blastocyst implants. Due to
             natural variability in the timing of ovulation, implantation
             does not necessarily occur before the expected onset of next
             menses.To estimate the maximum screening sensitivity of
             pregnancy tests when used on the first day of the expected
             period, taking into account the natural variability of
             ovulation and implantation.Community-based prospective
             cohort study conducted in North Carolina between 1982 and
             1986.Two hundred twenty-one healthy women 21 to 42 years of
             age who were planning to conceive.Day of implantation,
             defined by the serial assay of first morning urine samples
             using an extremely sensitive immunoradiometric assay for
             human chorionic gonadotropin (hCG), relative to the first
             day of the missed period, defined as the day on which women
             expected their next menses to begin, based on self-reported
             usual cycle length.Data were available for 136 clinical
             pregnancies conceived during the study, 14 (10%) of which
             had not yet implanted by the first day of the missed period.
             The highest possible screening sensitivity for an hCG-based
             pregnancy test therefore is estimated to be 90% (95%
             confidence interval [CI], 84%-94%) on the first day of the
             missed period. By 1 week after the first day of the missed
             period, the highest possible screening sensitivity is
             estimated to be 97% (95% CI, 94%-99%).In this study, using
             an extremely sensitive assay for hCG, 10% of clinical
             pregnancies were undetectable on the first day of missed
             menses. In practice, an even larger percentage of clinical
             pregnancies may be undetected by current test kits on this
             day, given their reported assay properties and other
             practical limitations.},
   Doi = {10.1001/jama.286.14.1759},
   Key = {fds257901}
}

@article{fds257888,
   Author = {Dunson, DB and Sinai, I and Colombo, B},
   Title = {The relationship between cervical secretions and the daily
             probabilities of pregnancy: effectiveness of the TwoDay
             Algorithm.},
   Journal = {Human Reproduction},
   Volume = {16},
   Number = {11},
   Pages = {2278-2282},
   Year = {2001},
   Month = {November},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/16.11.2278},
   Abstract = {BACKGROUND:The TwoDay Algorithm is a simple method for
             identifying the fertile window. It classifies a day as
             fertile if cervical secretions are present on that day or
             were present on the day before. This approach may be an
             effective alternative to the ovulation and symptothermal
             methods for populations and programmes that find current
             natural family planning methods difficult to implement.
             METHODS:We used data on secretions from a large
             multinational European fecundability study to assess the
             relationship between the days predicted to be potentially
             fertile by the TwoDay Algorithm and the day-specific
             probabilities of pregnancy based on intercourse patterns in
             434 conception cycles from the study. RESULTS:The days
             around ovulation that had the highest fecundability were the
             days most likely to be classified as fertile by the TwoDay
             Algorithm. In addition, intercourse on a particular day in
             the fertile interval was twice as likely to result in a
             pregnancy if cervical secretions were present on that day or
             the day before. CONCLUSIONS:The TwoDay Algorithm is
             effective, both in identifying the fertile days of the cycle
             and in predicting days within the fertile interval that have
             a high pregnancy rate. Our data provide the first direct
             evidence that cervical secretions are associated with higher
             fecundability within the fertile window.},
   Doi = {10.1093/humrep/16.11.2278},
   Key = {fds257888}
}

@article{fds257889,
   Author = {Dunson, DB},
   Title = {Bayesian modeling of the level and duration of fertility in
             the menstrual cycle.},
   Journal = {Biometrics},
   Volume = {57},
   Number = {4},
   Pages = {1067-1073},
   Year = {2001},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2001.01067.x},
   Abstract = {Time to pregnancy studies that identify ovulation days and
             collect daily intercourse data can be used to estimate the
             day-specific probabilities of conception given intercourse
             on a single day relative to ovulation. In this article, a
             Bayesian semiparametric model is described for flexibly
             characterizing covariate effects and heterogeneity among
             couples in daily fecundability. The proposed model is
             characterized by the timing of the most fertile day of the
             cycle relative to ovulation, by the probability of
             conception due to intercourse on the most fertile day, and
             by the ratios of the daily conception probabilities for
             other days of the cycle relative to this peak probability.
             The ratios are assumed to be increasing in time to the peak
             and decreasing thereafter. Generalized linear mixed models
             are used to incorporate covariate and couple-specific
             effects on the peak probability and on the day-specific
             ratios. A Markov chain Monte Carlo algorithm is described
             for posterior estimation, and the methods are illustrated
             through application to caffeine data from a North Carolina
             pregnancy study.},
   Doi = {10.1111/j.0006-341x.2001.01067.x},
   Key = {fds257889}
}

@article{fds257905,
   Author = {Dollé, MET and Snyder, WK and Dunson, DB and Vijg,
             J},
   Title = {Mutational fingerprints of aging.},
   Journal = {Nucleic Acids Research},
   Volume = {30},
   Number = {2},
   Pages = {545-549},
   Year = {2002},
   Month = {January},
   ISSN = {0305-1048},
   url = {http://dx.doi.org/10.1093/nar/30.2.545},
   Abstract = {Using a lacZ plasmid transgenic mouse model, spectra of
             spontaneous point mutations were determined in brain, heart,
             liver, spleen and small intestine in young and old mice.
             While similar at a young age, the mutation spectra among
             these organs were significantly different in old age. In
             brain and heart G:C-->A:T transitions at CpG sites were the
             predominant mutation, suggesting that oxidative damage is
             not a major mutagenic event in these tissues. Other base
             changes, especially those affecting A:T base pairs,
             positively correlated with increasing proliferative activity
             of the different tissues. A relatively high percentage of
             base changes at A:T base pairs and compound mutants were
             found in both spleen and spontaneous lymphoma, suggesting a
             possible role of the hypermutation process in splenocytes in
             carcinogenesis. The similar mutant spectra observed at a
             young age may reflect a common mutation mechanism for all
             tissues that could be driven by the rapid cell division that
             takes place during development. However, the spectra of the
             young tissues did not resemble that of the most
             proliferative aged tissue, implying that replicative history
             per se is not the underlying causal factor of age-related
             organ-specific differences in mutation spectra. Rather,
             differences in organ function, possibly in association with
             replicative history, may explain the divergence in mutation
             spectra during aging.},
   Doi = {10.1093/nar/30.2.545},
   Key = {fds257905}
}

@article{fds257902,
   Author = {Dunson, DB and Dinse, GE},
   Title = {Bayesian models for multivariate current status data with
             informative censoring.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {1},
   Pages = {79-88},
   Year = {2002},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00079.x},
   Abstract = {Multivariate current status data, consist of indicators of
             whether each of several events occur by the time of a single
             examination. Our interest focuses on inferences about the
             joint distribution of the event times. Conventional methods
             for analysis of multiple event-time data cannot be used
             because all of the event times are censored and censoring
             may be informative. Within a given subject, we account for
             correlated event times through a subject-specific latent
             variable, conditional upon which the various events are
             assumed to occur independently. We also assume that each
             event contributes independently to the hazard of censoring.
             Nonparametric step functions are used to characterize the
             baseline distributions of the different event times and of
             the examination times. Covariate and subject-specific
             effects are incorporated through generalized linear models.
             A Markov chain Monte Carlo algorithm is described for
             estimation of the posterior distributions of the unknowns.
             The methods are illustrated through application to multiple
             tumor site data from an animal carcinogenicity
             study.},
   Doi = {10.1111/j.0006-341x.2002.00079.x},
   Key = {fds257902}
}

@article{fds257904,
   Author = {Dunson, DB and Baird, DD},
   Title = {A proportional hazards model for incidence and induced
             remission of disease.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {1},
   Pages = {71-78},
   Year = {2002},
   Month = {March},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00071.x},
   Abstract = {To assess the protective effects of a time-varying
             covariate, we develop a stochastic model based on tumor
             biology. The model assumes that individuals have a
             Poisson-distributed pool of initiated clones, which progress
             through predetectable, detectable mortal and detectable
             immortal stages. Time-independent covariates are
             incorporated through a log-linear model for the expected
             number of clones, resulting in a proportional hazards model
             for disease onset. By allowing time-dependent covariates to
             induce clone death, with rate dependent on a clone's state,
             the model is flexible enough to accommodate delayed disease
             onset and remission or cure of preexisting disease.
             Inference uses Bayesian methods via Markov chain Monte
             Carlo. Theoretical properties are derived, and the approach
             is illustrated through analysis of the effects of childbirth
             on uterine leiomyoma (fibroids).},
   Doi = {10.1111/j.0006-341x.2002.00071.x},
   Key = {fds257904}
}

@article{fds257903,
   Author = {Dunson, DB and Colombo, B and Baird, DD},
   Title = {Changes with age in the level and duration of fertility in
             the menstrual cycle.},
   Journal = {Human Reproduction},
   Volume = {17},
   Number = {5},
   Pages = {1399-1403},
   Year = {2002},
   Month = {May},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/17.5.1399},
   Abstract = {BACKGROUND:Most analyses of age-related changes in fertility
             cannot separate effects due to reduced frequency of sexual
             intercourse from effects directly related to ageing.
             Information on intercourse collected daily through each
             menstrual cycle provides the data for estimating
             day-specific probabilities of pregnancy for specific days
             relative to ovulation, and these estimates allow
             unconfounded analysis of ageing effects. METHODS:A total of
             782 healthy couples using natural family planning methods
             contributed prospective data on 5860 menstrual cycles. Day
             of ovulation was based on basal body temperature
             measurements. Estimates of day-specific probabilities of
             pregnancy and the length of the fertile window were compared
             across age groups. RESULTS:Nearly all pregnancies occurred
             within a 6 day fertile window. There was no evidence for a
             shorter fertile window in older men or women. On average,
             the day-specific probabilities of pregnancy declined with
             age for women from the late 20s onward, with probabilities
             of pregnancy twice as high for women aged 19-26 years
             compared with women aged 35-39 years. Controlling for age of
             the woman, fertility was significantly reduced for men aged
             >35 years. CONCLUSIONS:Women's fertility begins to decline
             in the late 20s with substantial decreases by the late 30s.
             Fertility for men is less affected by age, but shows
             significant decline by the late 30s.},
   Doi = {10.1093/humrep/17.5.1399},
   Key = {fds257903}
}

@article{fds257906,
   Author = {Tiano, HF and Loftin, CD and Akunda, J and Lee, CA and Spalding, J and Sessoms, A and Dunson, DB and Rogan, EG and Morham, SG and Smart, RC and Langenbach, R},
   Title = {Deficiency of either cyclooxygenase (COX)-1 or COX-2 alters
             epidermal differentiation and reduces mouse skin
             tumorigenesis.},
   Journal = {Cancer Research},
   Volume = {62},
   Number = {12},
   Pages = {3395-3401},
   Year = {2002},
   Month = {June},
   Abstract = {Nonsteroidal anti-inflammatory drugs are widely reported to
             inhibit carcinogenesis in humans and in rodents. These drugs
             are believed to act by inhibiting one or both of the known
             isoforms of cyclooxygenase (COX). However, COX-2, and not
             COX-1, is the isoform most frequently reported to have a key
             role in tumor development. Here we report that homozygous
             deficiency of either COX-1 or COX-2 reduces skin
             tumorigenesis by 75% in a multistage mouse skin model.
             Reduced tumorigenesis was observed even though the levels of
             stable 7,12-dimethylbenz(a)anthracene-DNA adducts were
             increased about 2-fold in the COX-deficient mice compared
             with wild-type mice. The premature onset of keratinocyte
             terminal differentiation appeared to be the cellular event
             leading to the reduced tumorigenesis because keratin 1 and
             keratin 10, two keratins that indicate the commitment of
             keratinocytes to differentiate, were expressed 8-13-fold and
             10-20-fold more frequently in epidermal basal cells of the
             COX-1-deficient and COX-2-deficient mice, respectively, than
             in wild-type mice. Papillomas on the COX-deficient mice also
             displayed the premature onset of keratinocyte terminal
             differentiation. However, loricrin, a late marker of
             epidermal differentiation, was not significantly altered,
             suggesting that it was the early stages of keratinocyte
             differentiation that were primarily affected by COX
             deficiency. Because keratin 5, a keratin associated with
             basal cells, was detected differently in papillomas of
             COX-1-deficient as compared with COX-2-deficient mice, it
             appears that the isoforms do not have identical roles in
             papilloma development. Interestingly, apoptosis, a cellular
             process associated with nonsteroidal anti-inflammatory
             drug-induced inhibition of tumorigenesis, was not
             significantly altered in the epidermis or in papillomas of
             the COX-deficient mice. Thus, both COX-1 and COX-2 have
             roles in keratinocyte differentiation, and we propose that
             the absence of either isoform causes premature terminal
             differentiation of initiated keratinocytes and reduced tumor
             formation.},
   Key = {fds257906}
}

@article{fds257900,
   Author = {Mikolajczyk, R},
   Title = {TwoDay Algorithm in predicting fertile time.},
   Journal = {Human Reproduction},
   Volume = {17},
   Number = {7},
   Pages = {1925},
   Year = {2002},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://dx.doi.org/10.1093/humrep/17.7.1925},
   Doi = {10.1093/humrep/17.7.1925},
   Key = {fds257900}
}

@article{fds257907,
   Author = {Zeise, L and Hattis, D and Andersen, M and Bailer, AJ and Bayard, S and Chen, C and Clewell, H and Conolly, R and Crump, K and Dunson, D and Finkel, A and Haber, L and Jarabek, AM and Kodell, R and Krewski, D and Thomas, D and Thorslund, T and Wassell, JT},
   Title = {Improving risk Assessment: Research opportunities in dose
             response modeling to improve risk assessment},
   Journal = {Human and Ecological Risk Assessment: an International
             Journal},
   Volume = {8},
   Number = {6},
   Pages = {1421-1444},
   Publisher = {Informa UK Limited},
   Year = {2002},
   Month = {October},
   ISSN = {1080-7039},
   url = {http://dx.doi.org/10.1080/20028091057448},
   Abstract = {Substantial improvements in dose response modeling for risk
             assessment may result from recent and continuing advances in
             biological research, biochemical techniques,
             biostatistical/mathematical methods and computational power.
             This report provides a ranked set of recommendations for
             proposed research to advance the state of the art in dose
             response modeling. The report is the result of a meeting of
             invited workgroup participants charged with identifying five
             areas of research in dose response modeling that could be
             incorporated in a national agenda to improve risk assessment
             methods. Leading topics of emphasis are interindividual
             variability, injury risk assessment modeling, and procedures
             to incorporate distributional methods and mechanistic
             considerations into now-standard methods of deriving a
             reference dose (RfD), reference concentration (RfC), minimum
             risk level (MRL) or similar dose-response parameter
             estimates. © 2002 by ASP.},
   Doi = {10.1080/20028091057448},
   Key = {fds257907}
}

@article{fds257908,
   Author = {Dunson, B and Baird, DD},
   Title = {Bayesian modeling of incidence and progression of disease
             from cross-sectional data.},
   Journal = {Biometrics},
   Volume = {58},
   Number = {4},
   Pages = {813-822},
   Year = {2002},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2002.00813.x},
   Abstract = {In the absence of longitudinal data, the current presence
             and severity of disease can be measured for a sample of
             individuals to investigate factors related to disease
             incidence and progression. In this article, Bayesian
             discrete-time stochastic models are developed for inference
             from cross-sectional data consisting of the age at first
             diagnosis, the current presence of disease, and one or more
             surrogates of disease severity. Semiparametric models are
             used for the age-specific hazards of onset and diagnosis,
             and a normal underlying variable approach is proposed for
             modeling of changes with latency time in disease severity.
             The model accommodates multiple surrogates of disease
             severity having different measurement scales and
             heterogeneity among individuals in disease progression. A
             Markov chain Monte Carlo algorithm is described for
             posterior computation, and the methods are applied to data
             from a study of uterine leiomyoma.},
   Doi = {10.1111/j.0006-341x.2002.00813.x},
   Key = {fds257908}
}

@article{fds257909,
   Author = {Baird, DD and Dunson, DB and Hill, MC and Cousins, D and Schectman,
             JM},
   Title = {High cumulative incidence of uterine leiomyoma in black and
             white women: ultrasound evidence.},
   Journal = {American Journal of Obstetrics and Gynecology},
   Volume = {188},
   Number = {1},
   Pages = {100-107},
   Year = {2003},
   Month = {January},
   url = {http://dx.doi.org/10.1067/mob.2003.99},
   Abstract = {OBJECTIVE:Uterine leiomyoma, or fibroid tumors, are the
             leading indication for hysterectomy in the United States,
             but the proportion of women in whom fibroid tumors develop
             is not known. This study screened for fibroid tumors,
             independently of clinical symptoms, to estimate the
             age-specific proportion of black and white women in whom
             fibroid tumors develop. STUDY DESIGN:Randomly selected
             members of an urban health plan who were 35 to 49 years old
             participated (n = 1364 women). Medical records and
             self-report were used to assess fibroid status for those
             women who were no longer menstruating (most of whom had had
             hysterectomies). Premenopausal women were screened by
             ultrasonography. We estimated the age-specific cumulative
             incidence of fibroid tumors for black and white women.
             RESULTS:Thirty-five percent of premenopausal women had a
             previous diagnosis of fibroid tumors. Fifty-one percent of
             the premenopausal women who had no previous diagnosis had
             ultrasound evidence of fibroid tumors. The estimated
             cumulative incidence of tumors by age 50 was >80% for black
             women and nearly 70% for white women. The difference between
             the age-specific cumulative incidence curves for black and
             white women was highly significant (odds ratio, 2.9; 95% CI,
             2.5-3.4; P <.001). CONCLUSION:The results of this study
             suggest that most black and white women in the United States
             develop uterine fibroid tumors before menopause and that
             uterine fibroid tumors develop in black women at earlier
             ages than in white women.},
   Doi = {10.1067/mob.2003.99},
   Key = {fds257909}
}

@article{fds257911,
   Author = {Chulada, PC and Arbes, SJ and Dunson, D and Zeldin,
             DC},
   Title = {Breast-feeding and the prevalence of asthma and wheeze in
             children: analyses from the Third National Health and
             Nutrition Examination Survey, 1988-1994.},
   Journal = {Journal of Allergy and Clinical Immunology},
   Volume = {111},
   Number = {2},
   Pages = {328-336},
   Year = {2003},
   Month = {February},
   url = {http://dx.doi.org/10.1067/mai.2003.127},
   Abstract = {BACKGROUND: Asthma prevalence has increased dramatically in
             recent years, especially among children. Breast-feeding
             might protect children against asthma and related conditions
             (recurrent wheeze), and this protective effect might depend
             on the duration and exclusivity of the breast-feeding
             regimen. OBJECTIVE: We sought to determine whether there is
             an association between breast-feeding and asthma, recurrent
             wheeze, or both in children up to 72 months of age and
             whether the duration and exclusivity of breast-feeding
             affect this association. METHODS: Data were from the third
             National Health and Nutrition Examination Survey, a
             nationally representative cross-sectional survey conducted
             from 1988 to 1994. We tested for significant associations
             between breast-feeding and physician-diagnosed asthma and
             recurrent wheeze (> or =3 episodes in the past 12 months)
             before and after adjusting for potential confounders.
             RESULTS: Crude analyses showed that breast-feeding was
             associated with significantly reduced risks for asthma and
             recurrent wheeze in children 2 to 71 months of age, but
             after adjusting for potential confounders, these overall
             protective associations attenuated and were no longer
             statistically significant. However, 2 new and important
             associations were revealed after adjusting for confounders:
             (1) compared with never breast-fed children, ever breast-fed
             children had significantly reduced odds of being diagnosed
             with asthma and of having recurrent wheeze before 24 months
             of age, and (2) among children 2 to 71 months of age who had
             been exposed to environmental tobacco smoke, those who had
             ever been breast-fed had significantly reduced risks of
             asthma and wheeze compared with those who had never been
             breast-fed. CONCLUSIONS: Breast-feeding might delay the
             onset of or actively protect children less than 24 months of
             age against asthma and recurrent wheeze. Breast-feeding
             might reduce the prevalence of asthma and recurrent wheeze
             in children exposed to environmental tobacco
             smoke.},
   Doi = {10.1067/mai.2003.127},
   Key = {fds257911}
}

@article{fds257910,
   Author = {Dunson, DB and Colombo, B},
   Title = {Bayesian modeling of markers of day-specific
             fertility},
   Journal = {Journal of the American Statistical Association},
   Volume = {98},
   Number = {461},
   Pages = {28-37},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1198/016214503388619067},
   Abstract = {Cervical mucus hydration increases during the fertile
             interval before ovulation. Because sperm can only penetrate
             mucus having a high water content, cervical secretions
             provide a reliable marker of the fertile days of the
             menstrual cycle. This article develops a Bayesian approach
             for modeling of daily observations of cervical mucus and
             applies the approach to assess heterogeneity among women and
             cycles from a given woman with respect to the increase in
             mucus hydration during the fertile interval. The proposed
             model relates the mucus observations to an underlying normal
             mucus hydration score, which varies relative to a peak
             hydration day. Uncertainty in the timing of the peak is
             accounted for, and a novel weighted mixture model is used to
             characterize heterogeneity in distinct features of the
             underlying mean function. Prior information on the mucus
             hydration trajectory is incorporated, and a Markov chain
             Monte Carlo approach is developed. Based on data from a
             study of daily fecundability, there appears to be
             substantial heterogeneity among women in detected
             preovulatory increases in mucus hydration, but only minimal
             differences among cycles from a given woman.},
   Doi = {10.1198/016214503388619067},
   Key = {fds257910}
}

@article{fds257912,
   Author = {Dunson, DB and Chulada, P and Arbes, SJ},
   Title = {Bayesian modeling of time-varying and waning exposure
             effects.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {1},
   Pages = {83-91},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1111/1541-0420.00010},
   Abstract = {In epidemiologic studies, there is often interest in
             assessing the association between exposure history and
             disease incidence. For many diseases, incidence may depend
             not only on cumulative exposure, but also on the ages at
             which exposure occurred. This article proposes a flexible
             Bayesian approach for modeling age-varying and waning
             exposure effects. The Cox model is generalized to allow the
             hazard of disease to depend on an integral, across the
             exposed ages, of a piecewise polynomial function of age,
             multiplied by an exponential decay term. Linearity
             properties of the model facilitate posterior computation via
             a Gibbs sampler, which generalizes previous algorithms for
             Cox regression with time-dependent covariates. The approach
             is illustrated by an application to the study of protective
             effects of breastfeeding on incidence of childhood
             asthma.},
   Doi = {10.1111/1541-0420.00010},
   Key = {fds257912}
}

@article{fds257913,
   Author = {Baird, DD and Dunson, DB},
   Title = {Why is parity protective for uterine fibroids?},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {14},
   Number = {2},
   Pages = {247-250},
   Year = {2003},
   Month = {March},
   url = {http://dx.doi.org/10.1097/01.ede.0000054360.61254.27},
   Abstract = {Uterine fibroids are benign tumors, the etiology of which is
             not understood. Symptoms can be debilitating, and the
             primary treatment is surgery, usually hysterectomy.
             Epidemiologic data show that pregnancy is associated with
             reduced risk of fibroids. We hypothesize that this
             association is attributable to a protective effect of
             postpartum involution of the uterus. After each pregnancy
             the uterus rapidly returns to prepregnancy size by dramatic
             remodeling of the tissue. We hypothesize that small fibroids
             are eliminated during this process. We present preliminary
             epidemiologic evidence that is consistent with this
             hypothesis. If the hypothesis is supported by more direct
             evidence, it may have broader implications, supporting the
             idea that tissue remodeling may be a general mechanism for
             limiting tumor development.},
   Doi = {10.1097/01.ede.0000054360.61254.27},
   Key = {fds257913}
}

@article{fds257914,
   Author = {Dunson, DB},
   Title = {Incorporating heterogeneous intercourse records into time to
             pregnancy models},
   Journal = {Mathematical Population Studies},
   Volume = {10},
   Number = {2},
   Pages = {127-143},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {April},
   ISSN = {0889-8480},
   url = {http://dx.doi.org/10.1080/08898480306714},
   Abstract = {Information on the timing of intercourse relative to
             ovulation can be incorporated into time to pregnancy models
             to improve the power to detect covariate effects, to
             estimate the day-specific conception probabilities, and to
             distinguish between biological and behavioral effects on
             fecundability, and therefore the probability of conception
             in a menstrual cycle. In this paper, Bayesian methods are
             proposed for joint modeling of intercourse behavior and
             biologic fecundability. The model accommodates a sterile
             subpopulation of couples, general covariate effects, and
             heterogeneity among fecund couples in menstrual cycle
             viability and in frequency of unprotected intercourse.
             Methods are described for incorporating cycles with varying
             amounts of intercourse information into a single analysis. A
             Markov chain Monte Carlo algorithm is outlined for
             estimation of the posterior distributions of the unknowns.
             The methods arc applied to data from a North Carolina study
             of couples attempting pregnancy. Copyright © 2003 Taylor
             Francis.},
   Doi = {10.1080/08898480306714},
   Key = {fds257914}
}

@article{fds257915,
   Author = {Stanford, JB and Smith, KR and Dunson, DB},
   Title = {Vulvar mucus observations and the probability of
             pregnancy.},
   Journal = {Obstetrics and Gynecology},
   Volume = {101},
   Number = {6},
   Pages = {1285-1293},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1016/s0029-7844(03)00358-2},
   Abstract = {OBJECTIVE: To assess the day-specific and cycle-specific
             probabilities of conception leading to clinical pregnancy,
             in relation to the timing of intercourse and vulvar mucus
             observations. METHODS: This was a retrospective cohort study
             of women beginning use of the Creighton Model Fertility Care
             System in Missouri, Nebraska, Kansas, and California. Data
             were abstracted from Creighton Model Fertility Care System
             records, including women's daily standardized vulvar
             observations of cervical mucus discharge, days of
             intercourse, and clinically evident pregnancy (conception).
             Established statistical models were used to estimate
             day-specific probabilities of conception. RESULTS: Data were
             analyzed from 1681 cycles with 81 conceptions from 309
             normally fertile couples (initially seeking to avoid
             pregnancy) and from 373 cycles with 30 conceptions from 117
             subfertile couples (who were initially trying to achieve
             pregnancy). The highest probability of pregnancy occurred on
             the peak day of vulvar mucus observation (.38 for normally
             fertile couples and.14 for subfertile couples). The
             probability of pregnancy was greater than.05 for normally
             fertile couples from 3 days before to 2 days after the peak,
             and for subfertile couples from 1 day before to 1 day after
             the peak. The cycle-specific probability of conception
             correlated with the quality of mucus discharge in normally
             fertile couples but not in subfertile couples. CONCLUSION:
             Standardized vulvar observations of vaginal mucus discharge
             identify the days with the greatest likelihood of conception
             from intercourse in normal fertility and subfertility and
             provide an indicator of the overall potential for conception
             in a given menstrual cycle in normal fertility.},
   Doi = {10.1016/s0029-7844(03)00358-2},
   Key = {fds257915}
}

@article{fds257917,
   Author = {Dunson, DB and Watson, M and Taylor, JA},
   Title = {Bayesian latent variable models for median regression on
             multiple outcomes.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {2},
   Pages = {296-304},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1111/1541-0420.00036},
   Abstract = {Often a response of interest cannot be measured directly and
             it is necessary to rely on multiple surrogates, which can be
             assumed to be conditionally independent given the latent
             response and observed covariates. Latent response models
             typically assume that residual densities are Gaussian. This
             article proposes a Bayesian median regression modeling
             approach, which avoids parametric assumptions about residual
             densities by relying on an approximation based on quantiles.
             To accommodate within-subject dependency, the quantile
             response categories of the surrogate outcomes are related to
             underlying normal variables, which depend on a latent normal
             response. This underlying Gaussian covariance structure
             simplifies interpretation and model fitting, without
             restricting the marginal densities of the surrogate
             outcomes. A Markov chain Monte Carlo algorithm is proposed
             for posterior computation, and the methods are applied to
             single-cell electrophoresis (comet assay) data from a
             genetic toxicology study.},
   Doi = {10.1111/1541-0420.00036},
   Key = {fds257917}
}

@article{fds257918,
   Author = {Dunson, DB and Neelon, B},
   Title = {Bayesian inference on order-constrained parameters in
             generalized linear models.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {2},
   Pages = {286-295},
   Year = {2003},
   Month = {June},
   url = {http://dx.doi.org/10.1111/1541-0420.00035},
   Abstract = {In biomedical studies, there is often interest in assessing
             the association between one or more ordered categorical
             predictors and an outcome variable, adjusting for
             covariates. For a k-level predictor, one typically uses
             either a k-1 degree of freedom (df) test or a single df
             trend test, which requires scores for the different levels
             of the predictor. In the absence of knowledge of a
             parametric form for the response function, one can
             incorporate monotonicity constraints to improve the
             efficiency of tests of association. This article proposes a
             general Bayesian approach for inference on order-constrained
             parameters in generalized linear models. Instead of choosing
             a prior distribution with support on the constrained space,
             which can result in major computational difficulties, we
             propose to map draws from an unconstrained posterior density
             using an isotonic regression transformation. This approach
             allows flat regions over which increases in the level of a
             predictor have no effect. Bayes factors for assessing
             ordered trends can be computed based on the output from a
             Gibbs sampling algorithm. Results from a simulation study
             are presented and the approach is applied to data from a
             time-to-pregnancy study.},
   Doi = {10.1111/1541-0420.00035},
   Key = {fds257918}
}

@article{fds257916,
   Author = {Dunson, DB and Chen, Z and Harry, J},
   Title = {A Bayesian approach for joint modeling of cluster size and
             subunit-specific outcomes.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {3},
   Pages = {521-530},
   Year = {2003},
   Month = {September},
   url = {http://dx.doi.org/10.1111/1541-0420.00062},
   Abstract = {In applications that involve clustered data, such as
             longitudinal studies and developmental toxicity experiments,
             the number of subunits within a cluster is often correlated
             with outcomes measured on the individual subunits. Analyses
             that ignore this dependency can produce biased inferences.
             This article proposes a Bayesian framework for jointly
             modeling cluster size and multiple categorical and
             continuous outcomes measured on each subunit. We use a
             continuation ratio probit model for the cluster size and
             underlying normal regression models for each of the
             subunit-specific outcomes. Dependency between cluster size
             and the different outcomes is accommodated through a latent
             variable structure. The form of the model facilitates
             posterior computation via a simple and computationally
             efficient Gibbs sampler. The approach is illustrated with an
             application to developmental toxicity data, and other
             applications, to joint modeling of longitudinal and event
             time data, are discussed.},
   Doi = {10.1111/1541-0420.00062},
   Key = {fds257916}
}

@article{fds257919,
   Author = {Dunson, DB},
   Title = {Dynamic Latent Trait Models for Multidimensional
             Longitudinal Data},
   Journal = {Journal of the American Statistical Association},
   Volume = {98},
   Number = {463},
   Pages = {555-563},
   Publisher = {Informa UK Limited},
   Year = {2003},
   Month = {September},
   url = {http://dx.doi.org/10.1198/016214503000000387},
   Abstract = {This article presents a new approach for analysis of
             multidimensional longitudinal data, motivated by studies
             using an item response battery to measure traits of an
             individual repeatedly over time. A general modeling
             framework is proposed that allows mixtures of count,
             categorical, and continuous response variables. Each
             response is related to age-specific latent traits through a
             generalized linear model that accommodates item-specific
             measurement errors. A transition model allows the latent
             traits at a given age to depend on observed predictors and
             on previous latent traits for that individual. Following a
             Bayesian approach to inference, a Markov chain Monte Carlo
             algorithm is proposed for posterior computation. The methods
             are applied to data from a neurotoxicity study of the
             pesticide methoxychlor, and evidence of a dose-dependent
             increase in motor activity is presented.},
   Doi = {10.1198/016214503000000387},
   Key = {fds257919}
}

@article{fds257920,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian inferences in the Cox model for order-restricted
             hypotheses.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {4},
   Pages = {916-923},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2003.00106.x},
   Abstract = {In studying the relationship between an ordered categorical
             predictor and an event time, it is standard practice to
             include dichotomous indicators of the different levels of
             the predictor in a Cox model. One can then use a multiple
             degree-of-freedom score or partial likelihood ratio test for
             hypothesis testing. Often, interest focuses on comparing the
             null hypothesis of no difference to an order-restricted
             alternative, such as a monotone increase across levels of a
             predictor. This article proposes a Bayesian approach for
             addressing hypotheses of this type. We reparameterize the
             Cox model in terms of a cumulative product of parameters
             having conjugate prior densities, consisting of mixtures of
             point masses at one, and truncated gamma densities. Due to
             the structure of the model, posterior computation can
             proceed via a simple and efficient Gibbs sampling algorithm.
             Posterior probabilities for the global null hypothesis and
             subhypotheses, comparing the hazards for specific groups,
             can be calculated directly from the output of a single Gibbs
             chain. The approach allows for level sets across which a
             predictor has no effect. Generalizations to multiple
             predictors are described, and the method is applied to a
             study of emergency medical treatment for
             stroke.},
   Doi = {10.1111/j.0006-341x.2003.00106.x},
   Key = {fds257920}
}

@article{fds257921,
   Author = {Chen, Z and Dunson, DB},
   Title = {Random effects selection in linear mixed
             models.},
   Journal = {Biometrics},
   Volume = {59},
   Number = {4},
   Pages = {762-769},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2003.00089.x},
   Abstract = {We address the important practical problem of how to select
             the random effects component in a linear mixed model. A
             hierarchical Bayesian model is used to identify any random
             effect with zero variance. The proposed approach
             reparameterizes the mixed model so that functions of the
             covariance parameters of the random effects distribution are
             incorporated as regression coefficients on standard normal
             latent variables. We allow random effects to effectively
             drop out of the model by choosing mixture priors with point
             mass at zero for the random effects variances. Due to the
             reparameterization, the model enjoys a conditionally linear
             structure that facilitates the use of normal conjugate
             priors. We demonstrate that posterior computation can
             proceed via a simple and efficient Markov chain Monte Carlo
             algorithm. The methods are illustrated using simulated data
             and real data from a study relating prenatal exposure to
             polychlorinated biphenyls and psychomotor development of
             children.},
   Doi = {10.1111/j.0006-341x.2003.00089.x},
   Key = {fds257921}
}

@article{fds257822,
   Author = {Trouba, K and Nyska, A and Styblo, M and Dunson, D and Lomnitski, L and Grossman, S and Moser, G and Suttie, A and Patterson, R and Walton, F and Germolec, D},
   Title = {Effect of antioxidants on the papilloma response and liver
             glutathione modulation mediated by arsenic in tg.ac
             transgenic mice},
   Journal = {Arsenic Exposure and Health Effects V},
   Pages = {283-293},
   Publisher = {Elsevier},
   Year = {2003},
   Month = {December},
   url = {http://dx.doi.org/10.1016/B978-044451441-7/50022-1},
   Abstract = {Epidemiological studies indicate that inorganic arsenicals
             produce various skin lesions as well as skin, lung, bladder,
             liver, prostate, and renal cancer. Our laboratory previously
             demonstrated that low-dose 12-O-tetradecanoylphorbol-13-acetate
             (TPA) increased the number of skin papillomas in Tg.AC
             transgenic mice that received sodium arsenite in drinking
             water, an effect dependent on proinflammatory cytokines.
             Because proinflammatory cytokine expression can be modulated
             by free radicals and oxidative stress, we hypothesized that
             oxidative stress contributes to TPA-promoted papilloma
             development in Tg.AC mice exposed to sodium arsenite. To
             evaluate the contribution of oxidative stress to arsenic
             skin carcinogenesis, two free-radical scavengers were tested
             for their ability to suppress papilloma responses (e.g.
             induction, latency, and multiplicity) modulated by arsenite
             in Tg.AC mice. Data indicate that arsenite increased
             papilloma responses in TPA-promoted Tg.AC mice as compared
             to control animals (no arsenite). The antioxidant vitamin E
             or a water-soluble natural antioxidant fraction from spinach
             had no inhibitory effect on TPA-promoted papilloma responses
             following arsenite exposure. Although not conclusively
             defined by our studies, oxidative stress generated by
             arsenic may contribute to skin carcinogenesis; however, it
             is not likely to be the sole or primary mechanism that
             enhances papilloma responses following arsenite exposure and
             TPA promotion. © 2003 Elsevier B.V.},
   Doi = {10.1016/B978-044451441-7/50022-1},
   Key = {fds257822}
}

@article{fds257922,
   Author = {Tingen, C and Stanford, JB and Dunson, DB},
   Title = {Methodologic and statistical approaches to studying human
             fertility and environmental exposure.},
   Journal = {Environmental Health Perspectives},
   Volume = {112},
   Number = {1},
   Pages = {87-93},
   Year = {2004},
   Month = {January},
   url = {http://dx.doi.org/10.1289/ehp.6263},
   Abstract = {Although there has been growing concern about the effects of
             environmental exposures on human fertility, standard
             epidemiologic study designs may not collect sufficient data
             to identify subtle effects while properly adjusting for
             confounding. In particular, results from conventional time
             to pregnancy studies can be driven by the many sources of
             bias inherent in these studies. By prospectively collecting
             detailed records of menstrual bleeding, occurrences of
             intercourse, and a marker of ovulation day in each menstrual
             cycle, precise information on exposure effects can be
             obtained, adjusting for many of the primary sources of bias.
             This article provides an overview of the different types of
             study designs, focusing on the data required, the practical
             advantages and disadvantages of each design, and the
             statistical methods required to take full advantage of the
             available data. We conclude that detailed prospective
             studies allowing inferences on day-specific probabilities of
             conception should be considered as the gold standard for
             studying the effects of environmental exposures on
             fertility.},
   Doi = {10.1289/ehp.6263},
   Key = {fds257922}
}

@article{fds257923,
   Author = {Dunson, DB and Baird, DD and Colombo, B},
   Title = {Increased infertility with age in men and
             women.},
   Journal = {Obstetrics and Gynecology},
   Volume = {103},
   Number = {1},
   Pages = {51-56},
   Year = {2004},
   Month = {January},
   url = {http://dx.doi.org/10.1097/01.aog.0000100153.24061.45},
   Abstract = {OBJECTIVE: To estimate the effects of aging on the
             percentage of outwardly healthy couples who are sterile
             (completely unable to conceive without assisted
             reproduction) or infertile (unable to conceive within a year
             of unprotected intercourse). METHODS: A prospective
             fecundability study was conducted in a sample of 782 couples
             recruited from 7 European centers for natural family
             planning. Women aged 18-40 years were eligible. Daily
             intercourse records were used to adjust for timing and
             frequency of intercourse when estimating the
             per-menstrual-cycle probability of conception. The number of
             menstrual cycles required to conceive a clinical pregnancy
             and the probability of sterility and infertility were
             derived from the estimated fecundability distributions for
             men and women of different ages. RESULTS: Sterility was
             estimated at about 1%; this percent did not change with age.
             The percentage infertility was estimated at 8% for women
             aged 19-26 years, 13-14% for women aged 27-34 years and 18%
             for women aged 35-39 years. Starting in the late 30s, male
             age was an important factor, with the percentage failing to
             conceive within 12 cycles increasing from an estimated
             18-28% between ages 35 and 40 years. The estimated
             percentage of infertile couples that would be able to
             conceive after an additional 12 cycles of trying varied from
             43-63% depending on age. CONCLUSION: Increased infertility
             in older couples is attributable primarily to declines in
             fertility rates rather than to absolute sterility. Many
             infertile couples will conceive if they try for an
             additional year.},
   Doi = {10.1097/01.aog.0000100153.24061.45},
   Key = {fds257923}
}

@article{fds257924,
   Author = {Bigelow, JL and Dunson, DB and Stanford, JB and Ecochard, R and Gnoth,
             C and Colombo, B},
   Title = {Mucus observations in the fertile window: a better predictor
             of conception than timing of intercourse.},
   Journal = {Human Reproduction},
   Volume = {19},
   Number = {4},
   Pages = {889-892},
   Year = {2004},
   Month = {April},
   url = {http://dx.doi.org/10.1093/humrep/deh173},
   Abstract = {BACKGROUND: Intercourse results in a pregnancy essentially
             only if it occurs during the 6-day fertile interval ending
             on the day of ovulation. The strong association between
             timing of intercourse within this interval and the
             probability of conception typically is attributed to limited
             sperm and egg life times. METHODS: A total of 782 women
             recruited from natural family planning centres in Europe
             contributed prospective data on 7288 menstrual cycles. Daily
             records of intercourse, basal body temperature and vaginal
             discharge of cervical mucus were collected. Probabilities of
             conception were estimated according to the timing of
             intercourse relative to ovulation and a 1-4 score of mucus
             quality. RESULTS: There was a strong increasing trend in the
             day-specific probabilities of pregnancy with increases in
             the mucus score. Adjusting for the mucus score, the
             day-specific probabilities had limited variability across
             the fertile interval. CONCLUSIONS: Changes in mucus quality
             across the fertile interval predict the observed pattern in
             the day-specific probabilities of conception. To maximize
             the likelihood of conception, intercourse should occur on
             days with optimal mucus quality, as observed in vaginal
             discharge, regardless of the exact timing relative to
             ovulation.},
   Doi = {10.1093/humrep/deh173},
   Key = {fds257924}
}

@article{fds257925,
   Author = {Neelon, B and Dunson, DB},
   Title = {Bayesian isotonic regression and trend analysis.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {2},
   Pages = {398-406},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00184.x},
   Abstract = {In many applications, the mean of a response variable can be
             assumed to be a nondecreasing function of a continuous
             predictor, controlling for covariates. In such cases,
             interest often focuses on estimating the regression
             function, while also assessing evidence of an association.
             This article proposes a new framework for Bayesian isotonic
             regression and order-restricted inference. Approximating the
             regression function with a high-dimensional piecewise linear
             model, the nondecreasing constraint is incorporated through
             a prior distribution for the slopes consisting of a product
             mixture of point masses (accounting for flat regions) and
             truncated normal densities. To borrow information across the
             intervals and smooth the curve, the prior is formulated as a
             latent autoregressive normal process. This structure
             facilitates efficient posterior computation, since the full
             conditional distributions of the parameters have simple
             conjugate forms. Point and interval estimates of the
             regression function and posterior probabilities of an
             association for different regions of the predictor can be
             estimated from a single MCMC run. Generalizations to
             categorical outcomes and multiple predictors are described,
             and the approach is applied to an epidemiology
             application.},
   Doi = {10.1111/j.0006-341x.2004.00184.x},
   Key = {fds257925}
}

@article{fds257926,
   Author = {Chen, Z and Dunson, DB},
   Title = {Bayesian estimation of survival functions under stochastic
             precedence.},
   Journal = {Lifetime Data Analysis},
   Volume = {10},
   Number = {2},
   Pages = {159-173},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1023/b:lida.0000030201.12943.13},
   Abstract = {When estimating the distributions of two random variables, X
             and Y, investigators often have prior information that Y
             tends to be bigger than X. To formalize this prior belief,
             one could potentially assume stochastic ordering between X
             and Y, which implies Pr(X < or = z) > or = Pr(Y < or = z)
             for all z in the domain of X and Y. Stochastic ordering is
             quite restrictive, though, and this article focuses instead
             on Bayesian estimation of the distribution functions of X
             and Y under the weaker stochastic precedence constraint,
             Pr(X < or = Y) > or = 0.5. We consider the case where both X
             and Y are categorical variables with common support and
             develop a Gibbs sampling algorithm for posterior
             computation. The method is then generalized to the case
             where X and Y are survival times. The proposed approach is
             illustrated using data on survival after tumor removal for
             patients with malignant melanoma.},
   Doi = {10.1023/b:lida.0000030201.12943.13},
   Key = {fds257926}
}

@article{fds257927,
   Author = {Dunson, DB and Chen, Z},
   Title = {Selecting factors predictive of heterogeneity in
             multivariate event time data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {2},
   Pages = {352-358},
   Year = {2004},
   Month = {June},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00179.x},
   Abstract = {In multivariate survival analysis, investigators are often
             interested in testing for heterogeneity among clusters, both
             overall and within specific classes. We represent different
             hypotheses about the heterogeneity structure using a
             sequence of gamma frailty models, ranging from a null model
             with no random effects to a full model having random effects
             for each class. Following a Bayesian approach, we define
             prior distributions for the frailty variances consisting of
             mixtures of point masses at zero and inverse-gamma
             densities. Since frailties with zero variance effectively
             drop out of the model, this prior allocates probability to
             each model in the sequence, including the overall null
             hypothesis of homogeneity. Using a counting process
             formulation, the conditional posterior distributions of the
             frailties and proportional hazards regression coefficients
             have simple forms. Posterior computation proceeds via a data
             augmentation Gibbs sampling algorithm, a single run of which
             can be used to obtain model-averaged estimates of the
             population parameters and posterior model probabilities for
             testing hypotheses about the heterogeneity structure. The
             methods are illustrated using data from a lung cancer
             trial.},
   Doi = {10.1111/j.0006-341x.2004.00179.x},
   Key = {fds257927}
}

@article{fds257929,
   Author = {Wilcox, AJ and Baird, DD and Dunson, DB and McConnaughey, DR and Kesner,
             JS and Weinberg, CR},
   Title = {On the frequency of intercourse around ovulation: evidence
             for biological influences.},
   Journal = {Human Reproduction},
   Volume = {19},
   Number = {7},
   Pages = {1539-1543},
   Year = {2004},
   Month = {July},
   ISSN = {0268-1161},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/15190016},
   Abstract = {BACKGROUND: Intercourse in mammals is often coordinated with
             ovulation, for example through fluctuations in libido or by
             the acceleration of ovulation with intercourse. Such
             coordination has not been established in humans. We explored
             this possibility by examining patterns of sexual intercourse
             in relation to ovulation. METHODS: Sixty-eight sexually
             active North Carolina women with either an intrauterine
             device or tubal ligation provided data for up to three
             menstrual cycles. These women collected daily urine
             specimens and kept daily diaries of intercourse and
             menstrual bleeding. Major estrogen and progesterone
             metabolites excreted in urine were used to identify the day
             of ovulation. The fertile days of the cycle were defined as
             the 6 consecutive days ending with ovulation. Women
             contributed a total of 171 ovulatory cycles. Menstrual
             bleeding days were excluded from analysis. RESULTS: The
             frequency of intercourse rose during the follicular phase,
             peaking at ovulation and declining abruptly thereafter. The
             6 consecutive days with most frequent intercourse
             corresponded with the 6 fertile days of the menstrual cycle.
             Intercourse was 24% more frequent during the 6 fertile days
             than during the remaining non-bleeding days (P < 0.001).
             CONCLUSIONS: There apparently are biological factors that
             promote intercourse during a woman's 6 fertile
             days.},
   Doi = {10.1093/humrep/deh305},
   Key = {fds257929}
}

@article{fds257928,
   Author = {Slama, R and Ducot, B and Keiding, N and Bouyer, J},
   Title = {Studying human fertility and environmental
             exposures.},
   Journal = {Environmental Health Perspectives},
   Volume = {112},
   Number = {11},
   Pages = {A604},
   Year = {2004},
   Month = {August},
   ISSN = {0091-6765},
   url = {http://dx.doi.org/10.1289/ehp.112-1247502},
   Doi = {10.1289/ehp.112-1247502},
   Key = {fds257928}
}

@article{fds257930,
   Author = {Dunson, DB and Holloman, C and Calder, C and Gunn,
             LH},
   Title = {Bayesian modeling of multiple lesion onset and growth from
             interval-censored data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {3},
   Pages = {676-683},
   Year = {2004},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00217.x},
   Abstract = {In studying rates of occurrence and progression of lesions
             (or tumors), it is typically not possible to obtain exact
             onset times for each lesion. Instead, data consist of the
             number of lesions that reach a detectable size between
             screening examinations, along with measures of the
             size/severity of individual lesions at each exam time. This
             interval-censored data structure makes it difficult to
             properly adjust for the onset time distribution in assessing
             covariate effects on rates of lesion progression. This
             article proposes a joint model for the multiple lesion onset
             and progression process, motivated by cross-sectional data
             from a study of uterine leiomyoma tumors. By using a joint
             model, one can potentially obtain more precise inferences on
             rates of onset, while also performing onset time-adjusted
             inferences on lesion severity. Following a Bayesian
             approach, we propose a data augmentation Markov chain Monte
             Carlo algorithm for posterior computation.},
   Doi = {10.1111/j.0006-341x.2004.00217.x},
   Key = {fds257930}
}

@article{fds257931,
   Author = {O'Brien, SM and Dunson, DB},
   Title = {Bayesian multivariate logistic regression.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {3},
   Pages = {739-746},
   Year = {2004},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/15339297},
   Abstract = {Bayesian analyses of multivariate binary or categorical
             outcomes typically rely on probit or mixed effects logistic
             regression models that do not have a marginal logistic
             structure for the individual outcomes. In addition,
             difficulties arise when simple noninformative priors are
             chosen for the covariance parameters. Motivated by these
             problems, we propose a new type of multivariate logistic
             distribution that can be used to construct a likelihood for
             multivariate logistic regression analysis of binary and
             categorical data. The model for individual outcomes has a
             marginal logistic structure, simplifying interpretation. We
             follow a Bayesian approach to estimation and inference,
             developing an efficient data augmentation algorithm for
             posterior computation. The method is illustrated with
             application to a neurotoxicology study.},
   Doi = {10.1111/j.0006-341X.2004.00224.x},
   Key = {fds257931}
}

@article{fds257932,
   Author = {Herring, AH and Dunson, DB and Dole, N},
   Title = {Modeling the effects of a bidirectional latent predictor
             from multivariate questionnaire data.},
   Journal = {Biometrics},
   Volume = {60},
   Number = {4},
   Pages = {926-935},
   Year = {2004},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2004.00248.x},
   Abstract = {Researchers often measure stress using questionnaire data on
             the occurrence of potentially stress-inducing life events
             and the strength of reaction to these events, characterized
             as negative or positive and assigned an ordinal ranking. In
             studying the health effects of stress, one needs to obtain
             measures of an individual's negative and positive stress
             levels to be used as predictors. Motivated by data of this
             type, we propose a latent variable model, which is
             characterized by event-specific negative and positive
             reaction scores. If the positive reaction score dominates
             the negative reaction score for an event, then the
             individual's reported response to that event will be
             positive, with an ordinal ranking determined by the value of
             the score. Measures of overall positive and negative stress
             can be obtained by summing the reactivity scores across the
             events that occur for an individual. By incorporating these
             measures as predictors in a regression model and fitting the
             stress and outcome models jointly using Bayesian methods,
             inferences can be conducted without the need to assume known
             weights for the different events. We propose an MCMC
             algorithm for posterior computation and apply the approach
             to study the effects of stress on preterm
             delivery.},
   Doi = {10.1111/j.0006-341x.2004.00248.x},
   Key = {fds257932}
}

@article{fds257934,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian latent variable models for mixed discrete
             outcomes.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {6},
   Number = {1},
   Pages = {11-25},
   Year = {2005},
   Month = {January},
   url = {http://dx.doi.org/10.1093/biostatistics/kxh025},
   Abstract = {In studies of complex health conditions, mixtures of
             discrete outcomes (event time, count, binary, ordered
             categorical) are commonly collected. For example, studies of
             skin tumorigenesis record latency time prior to the first
             tumor, increases in the number of tumors at each week, and
             the occurrence of internal tumors at the time of death.
             Motivated by this application, we propose a general
             underlying Poisson variable framework for mixed discrete
             outcomes, accommodating dependency through an additive gamma
             frailty model for the Poisson means. The model has
             log-linear, complementary log-log, and proportional hazards
             forms for count, binary and discrete event time outcomes,
             respectively. Simple closed form expressions can be derived
             for the marginal expectations, variances, and correlations.
             Following a Bayesian approach to inference,
             conditionally-conjugate prior distributions are chosen that
             facilitate posterior computation via an MCMC algorithm. The
             methods are illustrated using data from a Tg.AC mouse
             bioassay study.},
   Doi = {10.1093/biostatistics/kxh025},
   Key = {fds257934}
}

@article{fds257935,
   Author = {Longnecker, MP and Klebanoff, MA and Dunson, DB and Guo, X and Chen, Z and Zhou, H and Brock, JW},
   Title = {Maternal serum level of the DDT metabolite DDE in relation
             to fetal loss in previous pregnancies.},
   Journal = {Environmental Research},
   Volume = {97},
   Number = {2},
   Pages = {127-133},
   Year = {2005},
   Month = {February},
   ISSN = {0013-9351},
   url = {http://dx.doi.org/10.1016/s0013-9351(03)00108-7},
   Abstract = {Use of 1,1,1-trichloro-2,2-bis(p-chlorophenyl)ethane (DDT)
             continues in about 25 countries. This use has been justified
             partly by the belief that it has no adverse consequences on
             human health. Evidence has been increasing, however, for
             adverse reproductive effects of DDT, but additional data are
             needed. Pregnant women who enrolled in the Collaborative
             Perinatal Project (United States, 1959-1965) were asked
             about their previous pregnancy history; blood samples were
             drawn and the serum frozen. In 1997-1999, the sera of 1717
             of these women who had previous pregnancies were analyzed
             for 1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE), the
             major breakdown product of DDT. The odds of previous fetal
             loss was examined in relation to DDE level in logistic
             regression models. Compared with women whose DDE level was
             <15 microg/L, the adjusted odds ratios of fetal loss
             according to category of DDE were as follows: 15-29
             microg/L, 1.1; 30-44 microg/L, 1.4; 45-59 microg/L, 1.6; and
             60+ microg/L, 1.2. The adjusted odds ratio per 60 microg/L
             increase was 1.4 (95% confidence interval 1.1-1.6). The
             results were consistent with an adverse effect of DDE on
             fetal loss, but were inconclusive owing to the possibility
             that previous pregnancies ending in fetal loss decreased
             serum DDE levels less than did those carried to
             term.},
   Doi = {10.1016/s0013-9351(03)00108-7},
   Key = {fds257935}
}

@article{fds257936,
   Author = {Dunson, DB and Stanford, JB},
   Title = {Bayesian inferences on predictors of conception
             probabilities.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {1},
   Pages = {126-133},
   Year = {2005},
   Month = {March},
   url = {http://dx.doi.org/10.1111/j.0006-341x.2005.031231.x},
   Abstract = {Reproductive scientists and couples attempting pregnancy are
             interested in identifying predictors of the day-specific
             probabilities of conception in relation to the timing of a
             single intercourse act. Because most menstrual cycles have
             multiple days of intercourse, the occurrence of conception
             represents the aggregation across Bernoulli trials for each
             intercourse day. Because of this data structure and
             dependency among the multiple cycles from a woman,
             implementing analyses has proven challenging. This article
             proposes a Bayesian approach based on a generalization of
             the Barrett and Marshall model to incorporate a
             woman-specific frailty and day-specific covariates. The
             model results in a simple closed form expression for the
             marginal probability of conception, and has an auxiliary
             variables formulation that facilitates efficient posterior
             computation. Although motivated by fecundability studies,
             the approach can be used for efficient variable selection
             and model averaging in general applications with categorical
             or discrete event time data.},
   Doi = {10.1111/j.0006-341x.2005.031231.x},
   Key = {fds257936}
}

@article{fds257938,
   Author = {Dunson, DB and Taylor, JA},
   Title = {Approximate Bayesian inference for quantites},
   Journal = {Journal of Nonparametric Statistics},
   Volume = {17},
   Number = {3},
   Pages = {385-400},
   Publisher = {Informa UK Limited},
   Year = {2005},
   Month = {April},
   url = {http://dx.doi.org/10.1080/10485250500039049},
   Abstract = {Suppose data consist of a random sample from a distribution
             function F Y, which is unknown, and that interest focuses on
             inferences on θ, a vector of quantiles of FY. When the
             likelihood function is not fully specified, a posterior
             density cannot be calculated and Bayesian inference is
             difficult. This article considers an approach which relies
             on a substitution likelihood characterized by a vector of
             quantiles. Properties of the substitution likelihood are
             investigated, strategies for prior elicitation are
             presented, and a general framework is proposed for quantile
             regression modeling. Posterior computation proceeds via a
             Metropolis algorithm that utilizes a normal approximation to
             the posterior. Results from a simulation study are
             presented, and the methods are illustrated through
             application to data from a genotoxicity experiment. © 2005
             Taylor & Francis Ltd.},
   Doi = {10.1080/10485250500039049},
   Key = {fds257938}
}

@article{fds258074,
   Author = {Dunson, DB and Bigelow, JL and Colombo, B},
   Title = {Reduced fertilization rates in older men when cervical mucus
             is suboptimal.},
   Journal = {Obstetrics and Gynecology},
   Volume = {105},
   Number = {4},
   Pages = {788-793},
   Year = {2005},
   Month = {April},
   ISSN = {0029-7844},
   url = {http://dx.doi.org/10.1097/01.aog.0000154155.20366.ee},
   Abstract = {OBJECTIVE: Cervical mucus is vital in the regulation of
             sperm survival and transport through the reproductive tract.
             The goal of this study is to assess whether the lowered
             fertility for men in their late 30s and early 40s is related
             to the nature of cervical mucus on the day of intercourse.
             METHODS: In a prospective study of 7 European family
             planning centers, 782 couples not using birth control
             recorded daily observations of intercourse and the nature of
             cervical mucus. Using data from 1,459 menstrual cycles, 342
             ending in pregnancy, we estimate day-specific conception
             probabilities in relation to mucus and male and female age.
             RESULTS: On days where cervical mucus was not evident,
             intercourse for men in their late 30s and early 40s was 50%
             less likely to result in a clinical pregnancy, adjusting for
             intercourse timing and female age. As secretions become more
             conducive to sperm transport, the effect of male age
             diminishes steadily from 21% on days with damp secretions,
             to 11% on days with thick mucus, to only 4% on days with
             most fertile-type mucus. CONCLUSION: The effect of male age
             on fecundability can be minimized by timing intercourse on
             days with optimal secretions. LEVEL OF EVIDENCE:
             II-2.},
   Doi = {10.1097/01.aog.0000154155.20366.ee},
   Key = {fds258074}
}

@article{fds257937,
   Author = {Dunson, DB and Herring, AH},
   Title = {Bayesian model selection and averaging in additive and
             proportional hazards models.},
   Journal = {Lifetime Data Analysis},
   Volume = {11},
   Number = {2},
   Pages = {213-232},
   Year = {2005},
   Month = {June},
   url = {http://dx.doi.org/10.1007/s10985-004-0384-x},
   Abstract = {Although Cox proportional hazards regression is the default
             analysis for time to event data, there is typically
             uncertainty about whether the effects of a predictor are
             more appropriately characterized by a multiplicative or
             additive model. To accommodate this uncertainty, we place a
             model selection prior on the coefficients in an
             additive-multiplicative hazards model. This prior assigns
             positive probability, not only to the model that has both
             additive and multiplicative effects for each predictor, but
             also to sub-models corresponding to no association, to only
             additive effects, and to only proportional effects. The
             additive component of the model is constrained to ensure
             non-negative hazards, a condition often violated by current
             methods. After augmenting the data with Poisson latent
             variables, the prior is conditionally conjugate, and
             posterior computation can proceed via an efficient Gibbs
             sampling algorithm. Simulation study results are presented,
             and the methodology is illustrated using data from the
             Framingham heart study.},
   Doi = {10.1007/s10985-004-0384-x},
   Key = {fds257937}
}

@article{fds258075,
   Author = {Dunson, DB},
   Title = {Bayesian semiparametric isotonic regression for count
             data},
   Journal = {Journal of the American Statistical Association},
   Volume = {100},
   Number = {470},
   Pages = {618-627},
   Publisher = {Informa UK Limited},
   Year = {2005},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214504000001457},
   Abstract = {This article proposes a semiparametric Bayesian approach for
             inference on an unknown isotonic regression function, f(x),
             characterizing the relationship between a continuous
             predictor, X, and a count response variable, Y, adjusting
             for covariates, Z. A Dirichlet process mixture of Poisson
             distributions is used to avoid parametric assumptions on the
             conditional distribution of Y given X and Z. Then, to also
             avoid parametric assumptions on f(x), a novel prior
             formulation is proposed that enforces the nondecreasing
             constraint and assigns positive prior probability to the
             null hypothesis of no association. Through the use of
             carefully tailored hyperprior distributions, we allow for
             borrowing of information across different regions of X in
             estimating f(x) and in assessing hypotheses about local
             increases in the function. Due to conjugacy properties,
             posterior computation is straightforward using a Markov
             chain Monte Carlo algorithm. The methods are illustrated
             using data from an epidemiologic study of sleep problems and
             obesity.},
   Doi = {10.1198/016214504000001457},
   Key = {fds258075}
}

@article{fds257939,
   Author = {Gunn, LH and Dunson, DB},
   Title = {A transformation approach for incorporating monotone or
             unimodal constraints.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {6},
   Number = {3},
   Pages = {434-449},
   Year = {2005},
   Month = {July},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxi020},
   Abstract = {Samples of curves are collected in many applications,
             including studies of reproductive hormone levels in the
             menstrual cycle. Many approaches have been proposed for
             correlated functional data of this type, including smoothing
             spline methods and other flexible parametric modeling
             strategies. In many cases, the underlying biological
             processes involved restrict the curve to follow a particular
             shape. For example, progesterone levels in healthy women
             increase during the menstrual cycle to a peak achieved at
             random location with decreases thereafter. Reproductive
             epidemiologists are interested in studying the distribution
             of the peak and the trajectory for women in different
             groups. Motivated by this application, we propose a simple
             approach for restricting each woman's mean trajectory to
             follow an umbrella shape. An unconstrained hierarchical
             Bayesian model is used to characterize the data, and draws
             from the posterior distribution obtained using a Gibbs
             sampler are then mapped to the constrained space. Inferences
             are based on the resulting quasi-posterior distribution for
             the peak and individual woman trajectories. The methods are
             applied to a study comparing progesterone trajectories for
             conception and nonconception cycles.},
   Doi = {10.1093/biostatistics/kxi020},
   Key = {fds257939}
}

@article{fds257940,
   Author = {Law, DCG and Klebanoff, MA and Brock, JW and Dunson, DB and Longnecker,
             MP},
   Title = {Maternal serum levels of polychlorinated biphenyls and
             1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE) and time
             to pregnancy.},
   Journal = {American Journal of Epidemiology},
   Volume = {162},
   Number = {6},
   Pages = {523-532},
   Year = {2005},
   Month = {September},
   url = {http://dx.doi.org/10.1093/aje/kwi240},
   Abstract = {Polychlorinated biphenyls (PCBs), once used widely in
             transformers and other applications, and
             1,1-dichloro-2,2-bis(p-chlorophenyl)ethylene (DDE), the main
             metabolite of the pesticide 1,1,1-trichloro-2,2-bis(p-chlorophenyl)ethane
             (DDT), are hormonally active agents. Changes in menstrual
             cycle functioning associated with PCBs and DDE, and
             increased odds of spontaneous abortion associated with DDE,
             suggest that these compounds could affect fertility. The
             authors investigated the association between PCB and DDE
             exposure and time to pregnancy by using serum levels
             measured in 390 pregnant women in the Collaborative
             Perinatal Project enrolled at 12 study centers in the United
             States from 1959 to 1965. They estimated adjusted
             fecundability odds ratios by using Cox proportional hazards
             modeling for discrete time data. Compared with time to
             pregnancy for women in the lowest exposure category (PCBs <
             1.24 microg/liter, DDE < 14 microg/liter), time to pregnancy
             increased for women in the highest exposure category in
             terms of both PCBs (fecundability odds ratio for PCBs > or =
             5.00 microg/liter = 0.65, 95% confidence interval: 0.36,
             1.18) and DDE (fecundability odds ratio for DDE > or = 60
             microg/liter = 0.65, 95% confidence interval: 0.32, 1.31).
             Overall, time to pregnancy increased with increasing serum
             PCB levels but was less suggestive of an association with
             DDE. Both trends were imprecise and attenuated when
             expressed on a lipid basis. Overall, evidence of an
             association between PCB or DDE exposure and time to
             pregnancy was weak and inconclusive.},
   Doi = {10.1093/aje/kwi240},
   Key = {fds257940}
}

@article{fds257941,
   Author = {Peddada, SD and Dunson, DB and Tan, X},
   Title = {Estimation of order-restricted means from correlated
             data},
   Journal = {Biometrika},
   Volume = {92},
   Number = {3},
   Pages = {703-715},
   Publisher = {Oxford University Press (OUP)},
   Year = {2005},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/92.3.703},
   Abstract = {In many applications, researchers are interested in
             estimating the mean of a multivariate normal random vector
             whose components are subject to order restrictions. Various
             authors have demonstrated that the likelihood-based
             methodology may perform poorly under certain conditions for
             such problems. The problem is much harder when the
             underlying covariance matrix is nondiagonal. In this paper a
             simple iterative algorithm is introduced that can be used
             for estimating the mean of a multivariate normal population
             when the components are subject to any order restriction.
             The proposed methodology is illustrated through an
             application to human reproductive hormone data. © 2005
             Biometrika Trust.},
   Doi = {10.1093/biomet/92.3.703},
   Key = {fds257941}
}

@article{fds258016,
   Author = {Gueorguieva, RV},
   Title = {Comments about Joint Modeling of Cluster Size and Binary and
             Continuous Subunit-Specific Outcomes.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {3},
   Pages = {862-866},
   Year = {2005},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-020x.2005.00409_1.x},
   Abstract = {In longitudinal studies and in clustered situations often
             binary and continuous response variables are observed and
             need to be modeled together. In a recent publication Dunson,
             Chen, and Harry (2003, Biometrics 59, 521-530) (DCH) propose
             a Bayesian approach for joint modeling of cluster size and
             binary and continuous subunit-specific outcomes and
             illustrate this approach with a developmental toxicity data
             example. In this note we demonstrate how standard software
             (PROC NLMIXED in SAS) can be used to obtain maximum
             likelihood estimates in an alternative parameterization of
             the model with a single cluster-level factor considered by
             DCH for that example. We also suggest that a more general
             model with additional cluster-level random effects provides
             a better fit to the data set. An apparent discrepancy
             between the estimates obtained by DCH and the estimates
             obtained earlier by Catalano and Ryan (1992, Journal of the
             American Statistical Association 87, 651-658) is also
             resolved. The issue of bias in inferences concerning the
             dose effect when cluster size is ignored is discussed. The
             maximum-likelihood approach considered herein is applicable
             to general situations with multiple clustered or
             longitudinally measured outcomes of different type and does
             not require prior specification and extensive
             programming.},
   Doi = {10.1111/j.1541-020x.2005.00409_1.x},
   Key = {fds258016}
}

@article{fds257933,
   Author = {Dunson, DB},
   Title = {Bayesian Biostatistics},
   Journal = {Handbook of Statistics},
   Volume = {25},
   Pages = {743-761},
   Publisher = {Elsevier},
   Year = {2005},
   Month = {December},
   ISSN = {0169-7161},
   url = {http://dx.doi.org/10.1016/S0169-7161(05)25025-3},
   Abstract = {With the rapid increase in biomedical technology and the
             accompanying generation of complex and high-dimensional data
             sets, Bayesian statistical methods have become much more
             widely used. One reason is that the Bayesian probability
             modeling machinery provides a natural framework for
             integration of data and information from multiple sources,
             while accounting for uncertainty in model specifications.
             This chapter briefly reviews some of the recent areas in
             which Bayesian biostatistical research has had the greatest
             impact. Particular areas of focus include correlated and
             longitudinal data analysis, event time data, nonlinear
             modeling, model averaging, and bioinformatics. The reader is
             referred elsewhere for recent Bayesian developments in other
             important areas, such as clinical trials and analysis of
             spatially correlated data. Certainly the many practical and
             conceptual advantages of the Bayesian paradigm will lead to
             an increasing impact in future biomedical research,
             particularly in areas such as genomics. © 2005 Elsevier
             B.V. All rights reserved.},
   Doi = {10.1016/S0169-7161(05)25025-3},
   Key = {fds257933}
}

@article{fds257942,
   Author = {Hans, C and Dunson, DB},
   Title = {Bayesian inferences on umbrella orderings.},
   Journal = {Biometrics},
   Volume = {61},
   Number = {4},
   Pages = {1018-1026},
   Year = {2005},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/16401275},
   Abstract = {In regression applications with categorical predictors,
             interest often focuses on comparing the null hypothesis of
             homogeneity to an ordered alternative. This article proposes
             a Bayesian approach for addressing this problem in the
             setting of normal linear and probit regression models. The
             regression coefficients are assigned a conditionally
             conjugate prior density consisting of mixtures of point
             masses at 0 and truncated normal densities, with a (possibly
             unknown) changepoint parameter included to accommodate
             umbrella ordering. Two strategies of prior elicitation are
             considered: (1) a Bayesian Bonferroni approach in which the
             probability of the global null hypothesis is specified and
             local hypotheses are considered independent; and (2) an
             approach which treats these probabilities as random. A
             single Gibbs sampling chain can be used to obtain posterior
             probabilities for the different hypotheses and to estimate
             regression coefficients and predictive quantities either by
             model averaging or under the preferred hypothesis. The
             methods are applied to data from a carcinogenesis
             study.},
   Doi = {10.1111/j.1541-0420.2005.00373.x},
   Key = {fds257942}
}

@article{fds257943,
   Author = {Baird, DD and Kesner, JS and Dunson, DB},
   Title = {Luteinizing hormone in premenopausal women may stimulate
             uterine leiomyomata development.},
   Journal = {Journal of the Society for Gynecologic Investigation},
   Volume = {13},
   Number = {2},
   Pages = {130-135},
   Year = {2006},
   Month = {February},
   ISSN = {1071-5576},
   url = {http://dx.doi.org/10.1016/j.jsgi.2005.12.001},
   Abstract = {OBJECTIVE: Human chorionic gonadotropin (hCG) has
             proliferative effects on uterine smooth muscle and leiomyoma
             tissue in vitro. We hypothesized that luteinizing hormone
             (LH) would have the same effect by activating the LH/hCG
             receptor, and it would follow that premenopausal women with
             higher basal LH levels would be more likely to have
             leiomyomata. METHODS: Randomly selected women, aged 35 to 49
             years, from a prepaid health plan were screened for
             leiomyomata with pelvic ultrasound. Urine samples collected
             during the first or last 5 days of the menstrual cycle were
             analyzed for LH by immunofluorometric assay, and
             concentrations were corrected for creatinine (n = 523).
             Logistic regression and Bayes analyses were used to evaluate
             the association of LH with presence and size of leiomyomata,
             adjusting for age, and other risk factors. RESULTS: Women
             with higher LH were more likely to have leiomyomata
             (adjusted odds ratios for second and third tertiles were 1.7
             and 2.0 compared with lower tertile; 95% confidence
             intervals, 1.0 to 2.7 and 1.2 to 3.4, respectively). The
             association was stronger for large leiomyomata. Bayes
             analyses designed to estimate LH effects on tumor onset
             separately from tumor growth showed significantly
             accelerated tumor onset but little evidence of effects on
             tumor growth. Age, an independent risk factor for
             leiomyomata, was not affected by inclusion of LH in the
             logistic models. CONCLUSIONS: As hypothesized, women with
             higher LH were more likely to have leiomyomata, but this did
             not explain the age-related increase in leiomyomata during
             perimenopausal ages. Determining whether LH is causal or a
             marker for susceptibility will require further
             research.},
   Doi = {10.1016/j.jsgi.2005.12.001},
   Key = {fds257943}
}

@article{fds258017,
   Author = {Scarpa, B and Dunson, DB and Colombo, B},
   Title = {Cervical mucus secretions on the day of intercourse: an
             accurate marker of highly fertile days.},
   Journal = {European Journal of Obstetrics, Gynecology, and Reproductive
             Biology},
   Volume = {125},
   Number = {1},
   Pages = {72-78},
   Year = {2006},
   Month = {March},
   url = {http://dx.doi.org/10.1016/j.ejogrb.2005.07.024},
   Abstract = {OBJECTIVE: To provide estimates of the probabilities of
             conception according to vulvar mucus observations classified
             by the woman on the day of intercourse. STUDY DESIGN:
             Prospective cohort study of 193 outwardly healthy Italian
             women using the Billings Ovulation Method. Outcome measures
             include 161 conception cycles and 2594 non-conception cycles
             with daily records of the type of mucus and the occurrences
             of sexual intercourse. RESULTS: The probability of
             conception ranged from 0.003 for days with no noticeable
             secretions to 0.29 for days with most fertile-type mucus
             detected by the woman. The probability of most fertile type
             mucus by day of the menstrual cycle increased from values
             <20% outside of days 10-17 to a peak of 59% on day 13.
             CONCLUSION: Regardless of the timing of intercourse in the
             menstrual cycle, the probability of conception is
             essentially 0 on days with no secretions. This probability
             increases dramatically to near 30% on days with most
             fertile-type mucus, an association that accurately predicts
             both the timing of the fertile interval and the day-specific
             conception probabilities across the menstrual
             cycle.},
   Doi = {10.1016/j.ejogrb.2005.07.024},
   Key = {fds258017}
}

@article{fds258018,
   Author = {Dunson, DB},
   Title = {Special issue of statistical methods in medical research on
             reproductive studies},
   Journal = {Statistical Methods in Medical Research},
   Volume = {15},
   Number = {2},
   Pages = {91-92},
   Publisher = {SAGE Publications},
   Year = {2006},
   Month = {April},
   ISSN = {0962-2802},
   url = {http://dx.doi.org/10.1191/0962280206sm432ed},
   Doi = {10.1191/0962280206sm432ed},
   Key = {fds258018}
}

@article{fds258019,
   Author = {Chen, Z and Dunson, DB},
   Title = {The authors replied as follows [2]},
   Journal = {Biometrics},
   Volume = {62},
   Number = {2},
   Pages = {623-624},
   Publisher = {WILEY},
   Year = {2006},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2006.00586_2.x},
   Doi = {10.1111/j.1541-0420.2006.00586_2.x},
   Key = {fds258019}
}

@article{fds258070,
   Author = {Cai, B and Dunson, DB},
   Title = {Bayesian covariance selection in generalized linear mixed
             models.},
   Journal = {Biometrics},
   Volume = {62},
   Number = {2},
   Pages = {446-457},
   Year = {2006},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://ftp.stat.duke.edu/WorkingPapers/05-01.html},
   Abstract = {The generalized linear mixed model (GLMM), which extends the
             generalized linear model (GLM) to incorporate random effects
             characterizing heterogeneity among subjects, is widely used
             in analyzing correlated and longitudinal data. Although
             there is often interest in identifying the subset of
             predictors that have random effects, random effects
             selection can be challenging, particularly when outcome
             distributions are nonnormal. This article proposes a fully
             Bayesian approach to the problem of simultaneous selection
             of fixed and random effects in GLMMs. Integrating out the
             random effects induces a covariance structure on the
             multivariate outcome data, and an important problem that we
             also consider is that of covariance selection. Our approach
             relies on variable selection-type mixture priors for the
             components in a special Cholesky decomposition of the random
             effects covariance. A stochastic search MCMC algorithm is
             developed, which relies on Gibbs sampling, with Taylor
             series expansions used to approximate intractable integrals.
             Simulated data examples are presented for different
             exponential family distributions, and the approach is
             applied to discrete survival data from a time-to-pregnancy
             study.},
   Doi = {10.1111/j.1541-0420.2005.00499.x},
   Key = {fds258070}
}

@article{fds258020,
   Author = {O'Brien, SM and Kupper, LL and Dunson, DB},
   Title = {Performance of tests of association in misspecified
             generalized linear models},
   Journal = {Journal of Statistical Planning and Inference},
   Volume = {136},
   Number = {9},
   Pages = {3090-3100},
   Publisher = {Elsevier BV},
   Year = {2006},
   Month = {September},
   ISSN = {0378-3758},
   url = {http://dx.doi.org/10.1016/j.jspi.2004.12.004},
   Abstract = {We examine the effects of modelling errors, such as
             underfitting and overfitting, on the asymptotic power of
             tests of association between an explanatory variable x and
             an outcome in the setting of generalized linear models. The
             regression function for x is approximated by a polynomial or
             another simple function, and a chi-square statistic is used
             to test whether the coefficients of the approximation are
             simultaneously equal to zero. Adding terms to the
             approximation increases asymptotic power if and only if the
             fit of the model increases by a certain quantifiable amount.
             Although a high degree of freedom approximation offers
             robustness to the shape of the unknown regression function,
             a low degree of freedom approximation can yield much higher
             asymptotic power even when the approximation is very poor.
             In practice, it is useful to compute the power of competing
             test statistics across the range of alternatives that are
             plausible a priori. This approach is illustrated through an
             application in epidemiology. © 2006 Elsevier B.V. All
             rights reserved.},
   Doi = {10.1016/j.jspi.2004.12.004},
   Key = {fds258020}
}

@article{fds258073,
   Author = {Dunson, DB},
   Title = {Bayesian dynamic modeling of latent trait
             distributions.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {7},
   Number = {4},
   Pages = {551-568},
   Year = {2006},
   Month = {October},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxj025},
   Abstract = {Studies of latent traits often collect data for multiple
             items measuring different aspects of the trait. For such
             data, it is common to consider models in which the different
             items are manifestations of a normal latent variable, which
             depends on covariates through a linear regression model.
             This article proposes a flexible Bayesian alternative in
             which the unknown latent variable density can change
             dynamically in location and shape across levels of a
             predictor. Scale mixtures of underlying normals are used in
             order to model flexibly the measurement errors and allow
             mixed categorical and continuous scales. A dynamic mixture
             of Dirichlet processes is used to characterize the latent
             response distributions. Posterior computation proceeds via a
             Markov chain Monte Carlo algorithm, with predictive
             densities used as a basis for inferences and evaluation of
             model fit. The methods are illustrated using data from a
             study of DNA damage in response to oxidative
             stress.},
   Doi = {10.1093/biostatistics/kxj025},
   Key = {fds258073}
}

@article{fds258021,
   Author = {Stanford, JB and Dunson, DB},
   Title = {Foreword. Expanding Methodologies for Capturing Day-Specific
             Probabilities of Conception.},
   Journal = {Paediatric and Perinatal Epidemiology},
   Volume = {20 Suppl 1},
   Pages = {1-2},
   Year = {2006},
   Month = {November},
   ISSN = {0269-5022},
   url = {http://dx.doi.org/10.1111/j.1365-3016.2006.00764.x},
   Doi = {10.1111/j.1365-3016.2006.00764.x},
   Key = {fds258021}
}

@article{fds258022,
   Author = {Stanford, JB and Dunson, DB},
   Title = {Paediatric and Perinatal Epidemiology: Foreword},
   Journal = {Paediatric and Perinatal Epidemiology},
   Volume = {20},
   Number = {SUPPL. 1},
   Pages = {1-2},
   Publisher = {WILEY},
   Year = {2006},
   Month = {November},
   ISSN = {0269-5022},
   url = {http://dx.doi.org/10.1111/j.1365-3016.2006.00764.x},
   Doi = {10.1111/j.1365-3016.2006.00764.x},
   Key = {fds258022}
}

@article{fds258023,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian selection of predictors of conception probabilities
             across the menstrual cycle.},
   Journal = {Paediatric and Perinatal Epidemiology},
   Volume = {20 Suppl 1},
   Number = {SUPPL. 1},
   Pages = {30-37},
   Year = {2006},
   Month = {November},
   ISSN = {0269-5022},
   url = {http://dx.doi.org/10.1111/j.1365-3016.2006.00768.x},
   Abstract = {There is increasing interest in identifying predictors of
             human fertility, including environmental exposures,
             behavioural factors, and biomarkers, such as mucus or
             reproductive hormones. Epidemiological studies typically
             measure fecundability, the per menstrual cycle probability
             of conception, using time to pregnancy data. A critical
             predictor, which is often ignored in the design or analysis,
             is the timing of non-contracepting intercourse in the
             menstrual cycle. In order to limit confounding by
             behavioural differences between exposure groups, it may be
             preferable to base inferences on day-specific conception
             probabilities in relation to intercourse timing. This
             article proposes Bayesian methods for selection of
             predictors of day-specific conception probabilities. A
             particular focus is the case in which data on ovulation
             timing are not available. We focus on the selection of
             fertile days in the cycle during which conception
             probabilities are non-negligible and predictors may play a
             role. Data from recent European and Italian prospective
             studies of daily fecundability are presented, and the
             proposed approach is used to estimate cervical mucus effects
             within a mid-cycle potentially fertile window using data
             from the Italian study.},
   Doi = {10.1111/j.1365-3016.2006.00768.x},
   Key = {fds258023}
}

@article{fds258069,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Bayesian semiparametric dynamic frailty models for multiple
             event time data.},
   Journal = {Biometrics},
   Volume = {62},
   Number = {4},
   Pages = {1044-1052},
   Year = {2006},
   Month = {December},
   ISSN = {0006-341X},
   url = {http://ftp.stat.duke.edu/WorkingPapers/04-27.html},
   Abstract = {Many biomedical studies collect data on times of occurrence
             for a health event that can occur repeatedly, such as
             infection, hospitalization, recurrence of disease, or tumor
             onset. To analyze such data, it is necessary to account for
             within-subject dependency in the multiple event times.
             Motivated by data from studies of palpable tumors, this
             article proposes a dynamic frailty model and Bayesian
             semiparametric approach to inference. The widely used shared
             frailty proportional hazards model is generalized to allow
             subject-specific frailties to change dynamically with age
             while also accommodating nonproportional hazards. Parametric
             assumptions on the frailty distribution are avoided by using
             Dirichlet process priors for a shared frailty and for
             multiplicative innovations on this frailty. By centering the
             semiparametric model on a conditionally conjugate dynamic
             gamma model, we facilitate posterior computation and
             lack-of-fit assessments of the parametric model. Our
             proposed method is demonstrated using data from a cancer
             chemoprevention study.},
   Doi = {10.1111/j.1541-0420.2006.00571.x},
   Key = {fds258069}
}

@article{fds257944,
   Author = {Baird, DD and Dunson, DB and Hill, MC and Cousins, D and Schectman,
             JM},
   Title = {Association of physical activity with development of uterine
             leiomyoma.},
   Journal = {American Journal of Epidemiology},
   Volume = {165},
   Number = {2},
   Pages = {157-163},
   Year = {2007},
   Month = {January},
   ISSN = {0002-9262},
   url = {http://dx.doi.org/10.1093/aje/kwj363},
   Abstract = {The relation between physical activity and uterine
             leiomyomata (fibroids) has received little study, but
             exercise is protective for breast cancer, another hormonally
             mediated tumor. Participants in this study were randomly
             selected members of a health plan based in Washington, DC,
             aged 35-49 years (734 African Americans, 455 Whites)
             enrolled between 1996 and 1999. Fibroid status was based on
             ultrasound screening. Physical activity was based on
             detailed interview questions. Logistic regression with
             adjustment for body mass index and other risk factors showed
             that women in the highest category of physical activity were
             significantly less likely to have fibroids (odds ratio =
             0.6, 95% confidence interval = 0.4, 0.9 for the highest vs.
             the lowest category (equivalent to approximately > or =7
             hours/week vs <2 hours/week)). There was a dose-response
             pattern; a significant trend was seen for both
             African-American and White women. A multistate Bayesian
             analysis indicated that exercise was associated with tumor
             onset more strongly than with tumor growth. When data for
             women who reported major fibroid-related symptoms were
             excluded, results remained essentially unchanged, suggesting
             that the observed association could not be attributed to
             reverse causation (fibroids preventing exercise). The
             authors concluded that regular exercise might help women
             prevent fibroids.},
   Doi = {10.1093/aje/kwj363},
   Key = {fds257944}
}

@article{fds257945,
   Author = {MacLehose, RF and Dunson, DB and Herring, AH and Hoppin,
             JA},
   Title = {Bayesian methods for highly correlated exposure
             data.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {18},
   Number = {2},
   Pages = {199-207},
   Year = {2007},
   Month = {March},
   ISSN = {1044-3983},
   url = {http://dx.doi.org/10.1097/01.ede.0000256320.30737.c0},
   Abstract = {Studies that include individuals with multiple highly
             correlated exposures are common in epidemiology. Because
             standard maximum likelihood techniques often fail to
             converge in such instances, hierarchical regression methods
             have seen increasing use. Bayesian hierarchical regression
             places prior distributions on exposure-specific regression
             coefficients to stabilize estimation and incorporate prior
             knowledge, if available. A common parametric approach in
             epidemiology is to treat the prior mean and variance as
             fixed constants. An alternative parametric approach is to
             place distributions on the prior mean and variance to allow
             the data to help inform their values. As a more flexible
             semiparametric option, one can place an unknown distribution
             on the coefficients that simultaneously clusters exposures
             into groups using a Dirichlet process prior. We also present
             a semiparametric model with a variable-selection prior to
             allow clustering of coefficients at 0. We compare these 4
             hierarchical regression methods and demonstrate their
             application in an example estimating the association of
             herbicides with retinal degeneration among wives of
             pesticide applicators.},
   Doi = {10.1097/01.ede.0000256320.30737.c0},
   Key = {fds257945}
}

@article{fds257946,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian methods for searching for optimal rules for timing
             intercourse to achieve pregnancy.},
   Journal = {Statistics in Medicine},
   Volume = {26},
   Number = {9},
   Pages = {1920-1936},
   Year = {2007},
   Month = {April},
   ISSN = {0277-6715},
   url = {http://dx.doi.org/10.1002/sim.2846},
   Abstract = {With societal trends towards increasing age at starting a
             pregnancy attempt, many women are concerned about achieving
             conception before the onset of infertility, which precedes
             menopause. Couples failing to conceive a pregnancy within 12
             months are classified as clinically infertile, and may be
             recommended for assisted reproductive therapy (ART). Because
             many ART procedures are expensive and may convey an
             increased risk of adverse outcomes for the offspring, it is
             advantageous to decrease time to pregnancy by natural
             methods. One possibility is to intentionally time
             intercourse during the days of the menstrual cycle having
             the highest conception probabilities. This article proposes
             a Bayesian decision theoretic approach for searching for
             optimal rules for timing intercourse based on cycle day,
             secretions and other information. Good rules result in high
             conception probabilities while requiring minimal targeted
             intercourse. A biologically based statistical model is used
             to relate cycle day and biomarkers to the conception
             probability. A stochastic search procedure is then developed
             to search for rules with high expected utility, and the
             methods are applied to data from a recent Italian
             study.},
   Doi = {10.1002/sim.2846},
   Key = {fds257946}
}

@article{fds258068,
   Author = {Dunson, DB and Pillai, N and Park, JH},
   Title = {Bayesian density regression},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {69},
   Number = {2},
   Pages = {163-183},
   Publisher = {WILEY},
   Year = {2007},
   Month = {April},
   ISSN = {1369-7412},
   url = {http://dx.doi.org/10.1111/j.1467-9868.2007.00582.x},
   Abstract = {The paper considers Bayesian methods for density regression,
             allowing a random probability distribution to change
             flexibly with multiple predictors. The conditional response
             distribution is expressed as a non-parametric mixture of
             regression models, with the mixture distribution changing
             with predictors. A class of weighted mixture of Dirichlet
             process priors is proposed for the uncountable collection of
             mixture distributions. It is shown that this specification
             results in a generalized Pólya urn scheme, which
             incorporates weights that are dependent on the distance
             between subjects' predictor values. To allow local
             dependence in the mixture distributions, we propose a
             kernel-based weighting scheme. A Gibbs sampling algorithm is
             developed for posterior computation. The methods are
             illustrated by using simulated data examples and an
             epidemiologic application. © Royal Statistical
             Society.},
   Doi = {10.1111/j.1467-9868.2007.00582.x},
   Key = {fds258068}
}

@article{fds258071,
   Author = {Dunson, DB},
   Title = {Empirical bayes density regression},
   Journal = {Statistica Sinica},
   Volume = {17},
   Number = {2},
   Pages = {481-504},
   Year = {2007},
   Month = {April},
   ISSN = {1017-0405},
   Abstract = {In Bayesian hierarchical modeling, it is often appealing to
             allow the conditional density of an (observable or
             unobservable) random variable Y to change flexibly with
             categorical and continuous predictors X. A mixture of
             regression models is proposed, with the mixture distribution
             varying with X. Treating the smoothing parameters and number
             of mixture components as unknown, the MLE does not exist,
             motivating an empirical Bayes approach. The proposed method
             shrinks the spatially-adaptive mixture distributions to a
             common baseline, while penalizing rapid changes and large
             numbers of components. The discrete form of the mixture
             distribution facilitates flexible classification of
             subjects. A Gibbs sampling algorithm is developed, which
             embeds a Monte Carlo EM-type stage to estimate smoothing and
             hyper-parameters. The method is applied to simulated
             examples and data from an epidemiologic study.},
   Key = {fds258071}
}

@article{fds257947,
   Author = {Stanford, JB and Dunson, DB},
   Title = {Effects of sexual intercourse patterns in time to pregnancy
             studies.},
   Journal = {American Journal of Epidemiology},
   Volume = {165},
   Number = {9},
   Pages = {1088-1095},
   Year = {2007},
   Month = {May},
   ISSN = {0002-9262},
   url = {http://dx.doi.org/10.1093/aje/kwk111},
   Abstract = {Time to pregnancy, typically defined as the number of
             menstrual cycles required to achieve a clinical pregnancy,
             is widely used as a measure of couple fecundity in
             epidemiologic studies. Time to pregnancy studies seldom
             utilize detailed data on the timing and frequency of sexual
             intercourse and the timing of ovulation. However, the
             simulated models in this paper illustrate that intercourse
             behavior can have a large impact on time to pregnancy and,
             likewise, on fecundability ratios, especially under
             conditions of low intercourse frequency or low fecundity.
             Because intercourse patterns in the menstrual cycles may
             vary substantially among groups, it is important to consider
             the effects of sexual behavior. Where relevant and feasible,
             an assessment should be made of the timing and frequency of
             intercourse relative to ovulation. Day-specific
             probabilities of pregnancy can be used to account for the
             effects of intercourse patterns. Depending on the research
             hypothesis, intercourse patterns may be considered as a
             potential confounder, mediator, or outcome.},
   Doi = {10.1093/aje/kwk111},
   Key = {fds257947}
}

@article{fds257979,
   Author = {Xue, Y and Dunson, D and Carin, L},
   Title = {The matrix stick-breaking process for flexible multi-task
             learning},
   Journal = {Acm International Conference Proceeding Series},
   Volume = {227},
   Pages = {1063-1070},
   Publisher = {ACM Press},
   Year = {2007},
   Month = {August},
   url = {http://dx.doi.org/10.1145/1273496.1273630},
   Abstract = {In multi-task learning our goal is to design regression or
             classification models for each of the tasks and
             appropriately share information between tasks. A Dirichlet
             process (DP) prior can be used to encourage task clustering.
             However, the DP prior does not allow local clustering of
             tasks with respect to a subset of the feature vector without
             making independence assumptions. Motivated by this problem,
             we develop a new multitask-learning prior, termed the matrix
             stick-breaking process (MSBP), which encourages cross-task
             sharing of data. However, the MSBP allows separate
             clustering and borrowing of information for the different
             feature components. This is important when tasks are more
             closely related for certain features than for others.
             Bayesian inference proceeds by a Gibbs sampling algorithm
             and the approach is illustrated using a simulated example
             and a multi-national application.},
   Doi = {10.1145/1273496.1273630},
   Key = {fds257979}
}

@article{fds257980,
   Author = {Ni, K and Carin, L and Dunson, D},
   Title = {Multi-task learning for sequential data via iHMMs and the
             nested Dirichlet process},
   Journal = {Acm International Conference Proceeding Series},
   Volume = {227},
   Pages = {689-696},
   Publisher = {ACM Press},
   Year = {2007},
   Month = {August},
   url = {http://dx.doi.org/10.1145/1273496.1273583},
   Abstract = {A new hierarchical nonparametric Bayesian model is proposed
             for the problem of multitask learning (MTL) with sequential
             data. Sequential data are typically modeled with a hidden
             Markov model (HMM), for which one often must choose an
             appropriate model structure (number of states) before
             learning. Here we model sequential data from each task with
             an infinite hidden Markov model (iHMM), avoiding the problem
             of model selection. The MTL for iHMMs is implemented by
             imposing a nested Dirichlet process (nDP) prior on the base
             distributions of the iHMMs. The nDP-iHMM MTL method allows
             us to perform task-level clustering and data-level
             clustering simultaneously, with which the learning for
             individual iHMMs is enhanced and between-task similarities
             are learned. Learning and inference for the nDP-iHMM MTL are
             based on a Gibbs sampler. The effectiveness of the framework
             is demonstrated using synthetic data as well as real music
             data.},
   Doi = {10.1145/1273496.1273583},
   Key = {fds257980}
}

@article{fds258067,
   Author = {Kinney, SK and Dunson, DB},
   Title = {Fixed and random effects selection in linear and logistic
             models.},
   Journal = {Biometrics},
   Volume = {63},
   Number = {3},
   Pages = {690-698},
   Year = {2007},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/17403104},
   Abstract = {We address the problem of selecting which variables should
             be included in the fixed and random components of logistic
             mixed effects models for correlated data. A fully Bayesian
             variable selection is implemented using a stochastic search
             Gibbs sampler to estimate the exact model-averaged posterior
             distribution. This approach automatically identifies subsets
             of predictors having nonzero fixed effect coefficients or
             nonzero random effects variance, while allowing uncertainty
             in the model selection process. Default priors are proposed
             for the variance components and an efficient parameter
             expansion Gibbs sampler is developed for posterior
             computation. The approach is illustrated using simulated
             data and an epidemiologic example.},
   Doi = {10.1111/j.1541-0420.2007.00771.x},
   Key = {fds258067}
}

@article{fds258072,
   Author = {Bigelow, JL and Dunson, DB},
   Title = {Bayesian adaptive regression splines for hierarchical
             data.},
   Journal = {Biometrics},
   Volume = {63},
   Number = {3},
   Pages = {724-732},
   Year = {2007},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/17403106},
   Abstract = {This article considers methodology for hierarchical
             functional data analysis, motivated by studies of
             reproductive hormone profiles in the menstrual cycle.
             Current methods standardize the cycle lengths and ignore the
             timing of ovulation within the cycle, both of which are
             biologically informative. Methods are needed that avoid
             standardization, while flexibly incorporating information on
             covariates and the timing of reference events, such as
             ovulation and onset of menses. In addition, it is necessary
             to account for within-woman dependency when data are
             collected for multiple cycles. We propose an approach based
             on a hierarchical generalization of Bayesian multivariate
             adaptive regression splines. Our formulation allows for an
             unknown set of basis functions characterizing the
             population-averaged and woman-specific trajectories in
             relation to covariates. A reversible jump Markov chain Monte
             Carlo algorithm is developed for posterior computation.
             Applying the methods to data from the North Carolina Early
             Pregnancy Study, we investigate differences in urinary
             progesterone profiles between conception and nonconception
             cycles.},
   Doi = {10.1111/j.1541-0420.2007.00761.x},
   Key = {fds258072}
}

@article{fds257948,
   Author = {Dunson, DB},
   Title = {Bayesian methods for latent trait modelling of longitudinal
             data.},
   Journal = {Statistical Methods in Medical Research},
   Volume = {16},
   Number = {5},
   Pages = {399-415},
   Year = {2007},
   Month = {October},
   ISSN = {0962-2802},
   url = {http://dx.doi.org/10.1177/0962280206075309},
   Abstract = {Latent trait models have long been used in the social
             science literature for studying variables that can only be
             measured indirectly through multiple items. However, such
             models are also very useful in accounting for correlation in
             multivariate and longitudinal data, particularly when
             outcomes have mixed measurement scales. Bayesian methods
             implemented with Markov chain Monte Carlo provide a flexible
             framework for routine fitting of a broad class of latent
             variable (LV) models, including very general structural
             equation models. However, in considering LV models, a number
             of challenging issues arise, including identifiability,
             confounding between the mean and variance, uncertainty in
             different aspects of the model, and difficulty in
             computation. Motivated by the problem of modelling
             multidimensional longitudinal data, this article reviews the
             recent literature, provides some recommendations and
             highlights areas in need of additional research, focusing on
             methods for model uncertainty.},
   Doi = {10.1177/0962280206075309},
   Key = {fds257948}
}

@article{fds258061,
   Author = {Scarpa, B and Dunson, DB and Giacchi, E},
   Title = {Bayesian selection of optimal rules for timing intercourse
             to conceive by using calendar and mucus.},
   Journal = {Fertility and Sterility},
   Volume = {88},
   Number = {4},
   Pages = {915-924},
   Year = {2007},
   Month = {October},
   ISSN = {0015-0282},
   url = {http://dx.doi.org/10.1016/j.fertnstert.2006.12.017},
   Abstract = {OBJECTIVE: To find optimal clinical rules that maximize the
             probability of conception while limiting the number of
             intercourse days required. DESIGN: Multicenter prospective
             study. Women were followed prospectively while they kept
             daily records of menstrual bleeding, intercourse, and mucus
             symptom characteristics. In some cycles, women sought to
             conceive, whereas in other cycles, they sought to avoid
             pregnancy. SETTING: Four centers providing services on
             fertility awareness. PATIENT(S): One hundred ninety-one
             healthy women using the Billings Ovulation Method. Women
             were invited to enroll by their instructors if they
             satisfied the entry criteria. We excluded cycles in which
             mucus was not recorded on a day with intercourse.
             INTERVENTION(S): None. MAIN OUTCOME MEASURE(S): Clinically
             identified pregnancies. There were 161 clinically identified
             pregnancies in 2,536 menstrual cycles from 191 women.
             RESULT(S): Our approach relies on a statistical model that
             relates daily predictors, such as type of mucus symptom, to
             the day-specific probabilities of conception. By using
             Bayesian methods to search over a large set of possible
             clinical rules, focusing on rules based on calendar and
             mucus, we found that simple rules that are based on days
             within the midcycle calendar interval that also have the
             most fertile-type mucus symptom present have high utility.
             CONCLUSION(S): Couples can shorten their time to pregnancy
             efficiently by timing intercourse on days that the most
             fertile-type mucus symptom is observed at the
             vulva.},
   Doi = {10.1016/j.fertnstert.2006.12.017},
   Key = {fds258061}
}

@article{fds258063,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Fitting semiparametric random effects models to large data
             sets.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {8},
   Number = {4},
   Pages = {821-834},
   Year = {2007},
   Month = {October},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxm008},
   Abstract = {For large data sets, it can be difficult or impossible to
             fit models with random effects using standard algorithms due
             to memory limitations or high computational burdens. In
             addition, it would be advantageous to use the abundant
             information to relax assumptions, such as normality of
             random effects. Motivated by data from an epidemiologic
             study of childhood growth, we propose a 2-stage method for
             fitting semiparametric random effects models to longitudinal
             data with many subjects. In the first stage, we use a
             multivariate clustering method to identify G<<N groups of
             subjects whose data have no scientifically important
             differences, as defined by subject matter experts. Then, in
             stage 2, group-specific random effects are assumed to come
             from an unknown distribution, which is assigned a Dirichlet
             process prior, further clustering the groups from stage 1.
             We use our approach to model the effects of maternal smoking
             during pregnancy on growth in 17,518 girls.},
   Doi = {10.1093/biostatistics/kxm008},
   Key = {fds258063}
}

@article{fds257866,
   Author = {Palomo, J and Dunson, DB and Bollen, K},
   Title = {Bayesian Structural Equation Modeling},
   Pages = {163-188},
   Publisher = {Elsevier},
   Year = {2007},
   Month = {December},
   url = {http://dx.doi.org/10.1016/B978-044452044-9/50011-2},
   Abstract = {This chapter focuses on Bayesian structural equation
             modeling. Structural equation models (SEMs) with latent
             variables are routinely used in social science research, and
             are of increasing importance in biomedical applications.
             Standard practice in implementing SEMs relies on frequentist
             methods. The chapter provides a simple and concise
             description of an alternative Bayesian approach. A
             description of the Bayesian specification of SEMs, and an
             outline of a Gibbs sampling strategy for model fitting is
             also presented. Bayesian inferences are illustrated through
             an industrialization and democratization case study. The
             Bayesian approach has some distinct advantages, due to the
             availability of samples from the joint posterior
             distribution of the model parameters and latent variables,
             which are highlighted in the chapter. These posterior
             samples provide important information not contained in the
             measurement and structural parameters. © 2007 Elsevier B.V.
             All rights reserved.},
   Doi = {10.1016/B978-044452044-9/50011-2},
   Key = {fds257866}
}

@article{fds258062,
   Author = {Cai, B and Dunson, DB},
   Title = {Bayesian multivariate isotonic regression splines:
             Applications to carcinogenicity studies},
   Journal = {Journal of the American Statistical Association},
   Volume = {102},
   Number = {480},
   Pages = {1158-1171},
   Publisher = {Informa UK Limited},
   Year = {2007},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214506000000942},
   Abstract = {In many applications, interest focuses on assessing the
             relationship between a predictor and a multivariate outcome
             variable, and there may be prior knowledge about the shape
             of the regression curves. For example, regression functions
             that relate dose of a possible risk factor to different
             adverse outcomes can often be assumed to be nondecreasing.
             In such cases, interest focuses on (1) assessing evidence of
             an overall adverse effect, (2) determining which outcomes
             are most affected, and (3) estimating outcome-specific
             regression curves. This article proposes a Bayesian approach
             for addressing this problem, motivated by multisite tumor
             data from carcinogenicity experiments. A multivariate
             smoothing spline model is specified, that accommodates
             dependency in the multiple curves through a hierarchical
             Markov random field prior for the basis coefficients, while
             also allowing for residual correlation. A Gibbs sampler is
             proposed for posterior computation, and the approach is
             applied to data on body weight and tumor
             occurrence.},
   Doi = {10.1198/016214506000000942},
   Key = {fds258062}
}

@article{fds258050,
   Author = {Rodriguez, A and Dunson, DB and Gelfand, AE},
   Title = {Nonparametric functional data analysis through Bayesian
             density estimation},
   Journal = {Biometrika},
   Volume = {96},
   Pages = {149-162},
   Year = {2008},
   Key = {fds258050}
}

@article{fds258058,
   Author = {Rodriguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested Dirichlet process (with discussion)},
   Journal = {Journal of the American Statistical Association},
   Year = {2008},
   Key = {fds258058}
}

@article{fds257949,
   Author = {Dunson, DB},
   Title = {Comment},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {481},
   Pages = {40-41},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000001436},
   Doi = {10.1198/016214507000001436},
   Key = {fds257949}
}

@article{fds258066,
   Author = {Dunson, DB and Xue, Y and Carin, L},
   Title = {The matrix stick-breaking process: Flexible Bayes
             meta-analysis},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {481},
   Pages = {317-327},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000001364},
   Abstract = {In analyzing data from multiple related studies, it often is
             of interest to borrow information across studies and to
             cluster similar studies. Although parametric hierarchical
             models are commonly used, of concern is sensitivity to the
             form chosen for the random-effects distribution. A Dirichlet
             process (DP) prior can allow the distribution to be unknown,
             while clustering studies; however, the DP does not allow
             local clustering of studies with respect to a subset of the
             coefficients without making independence assumptions.
             Motivated by this problem, we propose a matrix
             stick-breaking process (MSBP) as a prior for a matrix of
             random probability measures. Properties of the MSBP are
             considered, and methods are developed for posterior
             computation using Markov chain Monte Carlo. Using the MSBP
             as a prior for a matrix of study-specific regression
             coefficients, we demonstrate advantages over parametric
             modeling in simulated examples. The methods are further
             illustrated using a multinational uterotrophic bioassay
             study.},
   Doi = {10.1198/016214507000001364},
   Key = {fds258066}
}

@article{fds258059,
   Author = {Dunson, DB and Herring, AH and Engel, SM},
   Title = {Bayesian selection and clustering of polymorphisms in
             functionally related genes},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {482},
   Pages = {534-546},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214507000000554},
   Abstract = {In epidemiologic studies, there is often interest in
             assessing the relationship between polymorphisms in
             functionally related genes and a health outcome. For each
             candidate gene, single nucleotide polymorphism (SNP) data
             are collected at a number of locations, resulting in a large
             number of possible genotypes. Because instabilities can
             result in analyses that include all the SNPs, dimensionality
             is typically reduced by conducting single SNP analyses or
             attempting to identify haplotypes. This article proposes an
             alternative Bayesian approach for reducing dimensionality. A
             multilevel Dirichlet process prior is used for the
             distribution of the SNP-specific regression coefficients
             within genes, incorporating a variable selection-type
             mixture structure to allow SNPs with no effect. This
             structure allows simultaneous selection of important SNPs
             and soft clustering of SNPs having similar impact on the
             health outcome. The methods are illustrated using data from
             a study of pro- and anti-inflammatory cytokine polymorphisms
             and spontaneous preterm birth.},
   Doi = {10.1198/016214507000000554},
   Key = {fds258059}
}

@article{fds258064,
   Author = {Pennell, ML and Dunson, DB},
   Title = {Nonparametric bayes testing of changes in a response
             distribution with an ordinal predictor.},
   Journal = {Biometrics},
   Volume = {64},
   Number = {2},
   Pages = {413-423},
   Year = {2008},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2007.00885.x},
   Abstract = {In certain biomedical studies, one may anticipate changes in
             the shape of a response distribution across the levels of an
             ordinal predictor. For instance, in toxicology studies,
             skewness and modality might change as dose increases. To
             address this issue, we propose a Bayesian nonparametric
             method for testing for distribution changes across an
             ordinal predictor. Using a dynamic mixture of Dirichlet
             processes, we allow the response distribution to change
             flexibly at each level of the predictor. In addition, by
             assigning mixture priors to the hyperparameters, we can
             obtain posterior probabilities of no effect of the predictor
             and identify the lowest dose level for which there is an
             appreciable change in distribution. The method also provides
             a natural framework for performing tests across multiple
             outcomes. We apply our method to data from a genotoxicity
             experiment.},
   Doi = {10.1111/j.1541-0420.2007.00885.x},
   Key = {fds258064}
}

@article{fds258065,
   Author = {Dunson, DB and Park, JH},
   Title = {Kernel stick-breaking processes},
   Journal = {Biometrika},
   Volume = {95},
   Number = {2},
   Pages = {307-323},
   Publisher = {Oxford University Press (OUP)},
   Year = {2008},
   Month = {June},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn012},
   Abstract = {We propose a class of kernel stick-breaking processes for
             uncountable collections of dependent random probability
             measures. The process is constructed by first introducing an
             infinite sequence of random locations. Independent random
             probability measures and beta-distributed random weights are
             assigned to each location. Predictor-dependent random
             probability measures are then constructed by mixing over the
             locations, with stick-breaking probabilities expressed as a
             kernel multiplied by the beta weights. Some theoretical
             properties of the process are described, including a
             covariate-dependent prediction rule. A retrospective Markov
             chain Monte Carlo algorithm is developed for posterior
             computation, and the methods are illustrated using a
             simulated example and an epidemiological application.© US
             Government/Department of Health and Human Services
             2008.},
   Doi = {10.1093/biomet/asn012},
   Key = {fds258065}
}

@article{fds257950,
   Author = {Elliott, L and Henderson, J and Northstone, K and Chiu, GY and Dunson,
             D and London, SJ},
   Title = {Prospective study of breast-feeding in relation to wheeze,
             atopy, and bronchial hyperresponsiveness in the Avon
             Longitudinal Study of Parents and Children
             (ALSPAC).},
   Journal = {Journal of Allergy and Clinical Immunology},
   Volume = {122},
   Number = {1},
   Pages = {49-54.e3},
   Year = {2008},
   Month = {July},
   ISSN = {0091-6749},
   url = {http://dx.doi.org/10.1016/j.jaci.2008.04.001},
   Abstract = {Breast-feeding clearly protects against early wheezing, but
             recent data suggest that it might increase later risk of
             atopic disease and asthma.We sought to examine the
             relationship between breast-feeding and later asthma and
             allergy outcomes by using data from the Avon Longitudinal
             Study of Parents and Children, a large birth cohort in the
             United Kingdom.We used adjusted logistic regression models
             to evaluate the association between breast-feeding and atopy
             at age 7 years, bronchial responsiveness to methacholine at
             age 8 years, and wheeze at ages 3 and 7 1/2 years. Bayesian
             methods were used to assess the possibility of bias caused
             by an influence of early wheezing on the duration of
             breast-feeding, as well as selection bias.Breast-feeding was
             protective for wheeze in the first 3 years of life (odds
             ratio [OR] of 0.80 [95% CI, 0.70-0.90] for > or = 6 months
             relative to never) but not wheeze (OR, 0.98; 95% CI,
             0.79-1.22), atopy (OR, 1.12; 95% CI, 0.92-1.35), or
             bronchial hyperresponsiveness (OR, 1.07; 95% CI, 0.82-1.40)
             at ages 7 to 8 years. Bayesian models adjusting for the
             longer duration of breast-feeding among children with
             wheezing in early infancy produced virtually identical
             results.We did not find consistent evidence for either a
             deleterious effect or a protective effect of breast-feeding
             on later risk of allergic disease in a large prospective
             birth cohort of children with objective outcome measures and
             extensive data on potential confounders and effect
             modifiers. Neither reverse causation nor loss to follow-up
             appears to have materially biased our results.},
   Doi = {10.1016/j.jaci.2008.04.001},
   Key = {fds257950}
}

@article{fds258057,
   Author = {Ni, K and Paisley, J and Carin, L and Dunson, D},
   Title = {Multi-task learning for analyzing and sorting large
             databases of sequential data},
   Journal = {Ieee Transactions on Signal Processing},
   Volume = {56},
   Number = {8 II},
   Pages = {3918-3931},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2008},
   Month = {August},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2008.924798},
   Abstract = {A new hierarchical nonparametric Bayesian framework is
             proposed for the problem of multi-task learning (MTL) with
             sequential data. The models for multiple tasks, each
             characterized by sequential data, are learned jointly, and
             the intertask relationships are obtained simultaneously.
             This MTL setting is used to analyze and sort large databases
             composed of sequential data, such as music clips. Within
             each data set, we represent the sequential data with an
             infinite hidden Markov model (iHMM), avoiding the problem of
             model selection (selecting a number of states). Across the
             data sets, the multiple iHMMs are learned jointly in a MTL
             setting, employing a nested Dirichlet process (nDP). The
             nDP-iHMM MTL method allows simultaneous task-level and
             data-level clustering, with which the individual iHMMs are
             enhanced and the between-task similarities are learned.
             Therefore, in addition to improved learning of each of the
             models via appropriate data sharing, the learned sharing
             mechanisms are used to infer interdata relationships of
             interest for data search. Specifically, the MTL-learned
             task-level sharing mechanisms are used to define the
             affinity matrix in a graph-diffusion sorting framework. To
             speed up the MCMC inference for large databases, the
             nDP-iHMM is truncated to yield a nested Dirichlet-distribution
             based HMM representation, which accommodates fast
             variational Bayesian (VB) analysis for large-scale
             inference, and the effectiveness of the framework is
             demonstrated using a database composed of 2500 digital music
             pieces. © 2008 IEEE.},
   Doi = {10.1109/TSP.2008.924798},
   Key = {fds258057}
}

@article{fds257951,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested dirichlet process},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {483},
   Pages = {1131-1144},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {September},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000000553},
   Abstract = {In multicenter studies, subjects in different centers may
             have different outcome distributions. This article is
             motivated by the problem of nonparametric modeling of these
             distributions, borrowing information across centers while
             also allowing centers to be clustered. Starting with a
             stick-breaking representation of the Dirichlet process (DP),
             we replace the random atoms with random probability measures
             drawn from a DP. This results in a nested DP prior, which
             can be placed on the collection of distributions for the
             different centers, with centers drawn from the same DP
             component automatically clustered together. Theoretical
             properties are discussed, and an efficient Markov chain
             Monte Carlo algorithm is developed for computation. The
             methods are illustrated using a simulation study and an
             application to quality of care in U.S. hospitals.},
   Doi = {10.1198/016214508000000553},
   Key = {fds257951}
}

@article{fds257952,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {The nested Dirichlet process: Rejoinder},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {483},
   Pages = {1153-1154},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {September},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000000616},
   Doi = {10.1198/016214508000000616},
   Key = {fds257952}
}

@article{fds257981,
   Author = {Qi, Y and Liu, D and Dunson, D and Carin, L},
   Title = {Multi-task compressive sensing with dirichlet process
             priors},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {768-775},
   Year = {2008},
   Month = {November},
   Abstract = {Compressive sensing (CS) is an emerging £eld that, under
             appropriate conditions, can signi£cantly reduce the number
             of measurements required for a given signal. In many
             applications, one is interested in multiple signals that may
             be measured in multiple CS-type measurements, where here
             each signal corresponds to a sensing "task". In this paper
             we propose a novel multitask compressive sensing framework
             based on a Bayesian formalism, where a Dirichlet process
             (DP) prior is employed, yielding a principled means of
             simultaneously inferring the appropriate sharing mechanisms
             as well as CS inversion for each task. A variational
             Bayesian (VB) inference algorithm is employed to estimate
             the full posterior on the model parameters. Copyright 2008
             by the author(s)/owner(s).},
   Key = {fds257981}
}

@article{fds257982,
   Author = {An, Q and Wang, C and Shterev, I and Wang, E and Carin, L and Dunson,
             DB},
   Title = {Hierarchical kernel stick-breaking process for multi-task
             image analysis},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {17-24},
   Year = {2008},
   Month = {November},
   Abstract = {The kernel stick-breaking process (KSBP) is employed to
             segment general imagery, imposing the condition that patches
             (small blocks of pixels) that are spatially proximate are
             more likely to be associated with the same cluster
             (segment). The number of clusters is not set a priori and is
             inferred from the hierarchical Bayesian model. Further, KSBP
             is integrated with a shared Dirichlet process prior to
             simultaneously model multiple images, inferring their
             inter-relationships. This latter application may be useful
             for sorting and learning relationships between multiple
             images. The Bayesian inference algorithm is based on a
             hybrid of variational Bayesian analysis and local sampling.
             In addition to providing details on the model and associated
             inference framework, example results are presented for
             several image-analysis problems. Copyright 2008 by the
             author(s)/owner(s).},
   Key = {fds257982}
}

@article{fds257983,
   Author = {Ren, L and Dunson, DB and Carin, L},
   Title = {The dynamic hierarchical Dirichlet process},
   Journal = {Proceedings of the 25th International Conference on Machine
             Learning},
   Pages = {824-831},
   Year = {2008},
   Month = {November},
   Abstract = {The dynamic hierarchical Dirichlet process (dHDP) is
             developed to model the time-evolving statistical properties
             of sequential data sets. The data collected at any time
             point are represented via a mixture associated with an
             appropriate underlying model, in the framework of HDP. The
             statistical properties of data collected at consecutive time
             points are linked via a random parameter that controls their
             probabilistic similarity. The sharing mechanisms of the
             time-evolving data are derived, and a relatively simple
             Markov Chain Monte Carlo sampler is developed. Experimental
             results are presented to demonstrate the model. Copyright
             2008 by the author(s)/owner(s).},
   Key = {fds257983}
}

@article{fds258052,
   Author = {Dunson, DB and Peddada, SD},
   Title = {Bayesian nonparametric inference on stochastic
             ordering},
   Journal = {Biometrika},
   Volume = {95},
   Number = {4},
   Pages = {859-874},
   Publisher = {Oxford University Press (OUP)},
   Year = {2008},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn043},
   Abstract = {We consider Bayesian inference about collections of unknown
             distributions subject to a partial stochastic ordering. To
             address problems in testing of equalities between groups and
             estimation of group-specific distributions, we propose
             classes of restricted dependent Dirichlet process priors.
             These priors have full support in the space of
             stochastically ordered distributions, and can be used for
             collections of unknown mixture distributions to obtain a
             flexible class of mixture models. Theoretical properties are
             discussed, efficient methods are developed for posterior
             computation using Markov chain Monte Carlo simulation and
             the methods are illustrated using data from a study of DNA
             damage and repair. © 2008 U.S. Government/Department of
             Health and Human Services.},
   Doi = {10.1093/biomet/asn043},
   Key = {fds258052}
}

@article{fds258053,
   Author = {Dunson, DB and Herring, AH and Siega-Riz, AM},
   Title = {Bayesian inference on changes in response densities over
             predictor clusters},
   Journal = {Journal of the American Statistical Association},
   Volume = {103},
   Number = {484},
   Pages = {1508-1517},
   Publisher = {Informa UK Limited},
   Year = {2008},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/016214508000001039},
   Abstract = {In epidemiology, it often is of interest to assess how
             individuals with different trajectories over time in an
             environmental exposure or biomarker differ with respect to a
             continuous response. For ease in interpretation and
             presentation of results, epidemiologists typically
             categorize predictors before analysis. To extend this
             approach to time-varying predictors, individuals can be
             clustered by their predictor trajectory, with the cluster
             index included as a predictor in a regression model for the
             response. This article develops a semiparametric Bayes
             approach that avoids assuming a prespecified number of
             clusters and allows the response to vary nonparametrically
             over predictor clusters. This methodology is motivated by
             interest in relating trajectories in weight gain during
             pregnancy to the distribution of birth weight adjusted for
             gestational age at delivery. In this setting, the proposed
             approach allows the tails of the birth weight density to
             vary flexibly over weight gain clusters. © 2008 American
             Statistical Association.},
   Doi = {10.1198/016214508000001039},
   Key = {fds258053}
}

@article{fds258031,
   Author = {Armagan, A and Dunson, DB},
   Title = {Sparse variational analysis of large longitudinal data
             sets},
   Journal = {Statistics & Probability Letters},
   Year = {2009},
   Key = {fds258031}
}

@article{fds257953,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {Bayesian Nonparametric Functional Data Analysis Through
             Density Estimation.},
   Journal = {Biometrika},
   Volume = {96},
   Number = {1},
   Pages = {149-162},
   Year = {2009},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asn054},
   Abstract = {In many modern experimental settings, observations are
             obtained in the form of functions, and interest focuses on
             inferences on a collection of such functions. We propose a
             hierarchical model that allows us to simultaneously estimate
             multiple curves nonparametrically by using dependent
             Dirichlet Process mixtures of Gaussians to characterize the
             joint distribution of predictors and outcomes. Function
             estimates are then induced through the conditional
             distribution of the outcome given the predictors. The
             resulting approach allows for flexible estimation and
             clustering, while borrowing information across curves. We
             also show that the function estimates we obtain are
             consistent on the space of integrable functions. As an
             illustration, we consider an application to the analysis of
             Conductivity and Temperature at Depth data in the north
             Atlantic.},
   Doi = {10.1093/biomet/asn054},
   Key = {fds257953}
}

@article{fds258029,
   Author = {Du, L and Ren, L and Dunson, DB and Carin, L},
   Title = {A Bayesian Model for Simultaneous Image Clustering,
             Annotation and Object Segmentation.},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2009},
   Pages = {486-494},
   Year = {2009},
   Month = {January},
   Abstract = {A non-parametric Bayesian model is proposed for processing
             multiple images. The analysis employs image features and,
             when present, the words associated with accompanying
             annotations. The model clusters the images into classes, and
             each image is segmented into a set of objects, also allowing
             the opportunity to assign a word to each object (localized
             labeling). Each object is assumed to be represented as a
             heterogeneous mix of components, with this realized via
             mixture models linking image features to object types. The
             number of image classes, number of object types, and the
             characteristics of the object-feature mixture models are
             inferred nonparametrically. To constitute spatially
             contiguous objects, a new logistic stick-breaking process is
             developed. Inference is performed efficiently via
             variational Bayesian analysis, with example results
             presented on two image databases.},
   Key = {fds258029}
}

@article{fds258047,
   Author = {Rodriguez, A and Dunson, DB and Taylor, J},
   Title = {Bayesian hierarchically weighted finite mixture models for
             samples of distributions.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {10},
   Number = {1},
   Pages = {155-171},
   Year = {2009},
   Month = {January},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxn024},
   Abstract = {Finite mixtures of Gaussian distributions are known to
             provide an accurate approximation to any unknown density.
             Motivated by DNA repair studies in which data are collected
             for samples of cells from different individuals, we propose
             a class of hierarchically weighted finite mixture models.
             The modeling framework incorporates a collection of k
             Gaussian basis distributions, with the individual-specific
             response densities expressed as mixtures of these bases. To
             allow heterogeneity among individuals and predictor effects,
             we model the mixture weights, while treating the basis
             distributions as unknown but common to all distributions.
             This results in a flexible hierarchical model for samples of
             distributions. We consider analysis of variance-type
             structures and a parsimonious latent factor representation,
             which leads to simplified inferences on non-Gaussian
             covariance structures. Methods for posterior computation are
             developed, and the model is used to select genetic
             predictors of baseline DNA damage, susceptibility to induced
             damage, and rate of repair.},
   Doi = {10.1093/biostatistics/kxn024},
   Key = {fds258047}
}

@article{fds258054,
   Author = {Dunson, DB},
   Title = {Nonparametric Bayes local partition models for random
             effects.},
   Journal = {Biometrika},
   Volume = {96},
   Number = {2},
   Pages = {249-262},
   Year = {2009},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asp021},
   Abstract = {This paper focuses on the problem of choosing a prior for an
             unknown random effects distribution within a Bayesian
             hierarchical model. The goal is to obtain a sparse
             representation by allowing a combination of global and local
             borrowing of information. A local partition process prior is
             proposed, which induces dependent local clustering. Subjects
             can be clustered together for a subset of their parameters,
             and one learns about similarities between subjects
             increasingly as parameters are added. Some basic properties
             are described, including simple two-parameter expressions
             for marginal and conditional clustering probabilities. A
             slice sampler is developed which bypasses the need to
             approximate the countably infinite random measure in
             performing posterior computation. The methods are
             illustrated using simulation examples, and an application to
             hormone trajectory data.},
   Doi = {10.1093/biomet/asp021},
   Key = {fds258054}
}

@article{fds258028,
   Author = {Ji, S and Dunson, D and Carin, L},
   Title = {Multitask compressive sensing},
   Journal = {Ieee Transactions on Signal Processing},
   Volume = {57},
   Number = {1},
   Pages = {92-106},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2009},
   Month = {January},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2008.2005866},
   Abstract = {Compressive sensing (CS) is a framework whereby one performs
             N nonadaptive measurements to constitute a vector v∈ℝN
             with v used to recover an approximation u∈RℝM to a
             desired signal u∈RℝM with N≪ M; this is performed
             under the assumption that uis sparse in the basis
             represented by the matrix Ψ∈RℝM×M. It has been
             demonstrated that with appropriate design of the compressive
             measurements used to define v, the decompressive mapping
             v⇁ umay be performed with error ∥u-u∥22 having
             asymptotic properties analogous to those of the best
             transform-coding algorithm applied in the basis Ψ. The
             mapping v⇁u constitutes an inverse problem, often solved
             using ℓ1 regularization or related techniques. In most
             previous research, if L〉 sets of compressive measurements
             vii=1,L are performed, each of the associated uii=1,L are
             recovered one at a time, independently. In many applications
             the "tasks"defined by the mappings vi⇁ ui are not
             statistically independent, and it may be possible to improve
             the performance of the inversion if statistical
             interrelationships are exploited. In this paper, we address
             this problem within a multitask learning setting, wherein
             the mapping vi ⇁uifor each task corresponds to inferring
             the parameters (here, wavelet coefficients) associated with
             the desired signal ui, and a shared prior is placed across
             all of the L tasks. Under this hierarchical Bayesian
             modeling, data from all L tasks contribute toward inferring
             a posterior on the hyperparameters, and once the shared
             prior is thereby inferred, the data from each of the L
             individual tasks is then employed to estimate the
             task-dependent wavelet coefficients. An empirical Bayesian
             procedure for the estimation of hyperparameters is
             considered; two fast inference algorithms extending the
             relevance vector machine (RVM) are developed. Example
             results on several data sets demonstrate the effectiveness
             and robustness of the proposed algorithms. © 2008
             IEEE.},
   Doi = {10.1109/TSP.2008.2005866},
   Key = {fds258028}
}

@article{fds258051,
   Author = {Bigelow, JL and Dunson, DB},
   Title = {Bayesian semiparametric joint models for functional
             predictors},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {485},
   Pages = {26-36},
   Publisher = {Informa UK Limited},
   Year = {2009},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2009.0001},
   Abstract = {Motivated by the need to understand and predict early
             pregnancy loss using hormonal indicators of pregnancy
             health, this article proposes a semiparametric Bayesian
             approach for assessing the relationship between functional
             predictors and a response. A multivariate adaptive spline
             model is used to describe the functional predictors, and a
             generalized linear model with a random intercept describes
             the response. Through specifying the random intercept to
             follow a Dirichlet process jointly with the random spline
             coefficients, we obtain a procedure that clusters
             trajectories according to shape and according to the
             parameters of the response model for each cluster. This very
             flexible method allows for the incorporation of covariates
             in the models for both the response and the trajectory. We
             apply the method to postovulatory progesterone data from the
             Early Pregnancy Study and find that the model successfully
             predicts early pregnancy loss. © 2009 American Statistical
             Association.},
   Doi = {10.1198/jasa.2009.0001},
   Key = {fds258051}
}

@article{fds257954,
   Author = {Dunson, DB},
   Title = {Bayesian nonparametric hierarchical modeling.},
   Journal = {Biometrical Journal. Biometrische Zeitschrift},
   Volume = {51},
   Number = {2},
   Pages = {273-284},
   Year = {2009},
   Month = {April},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/19358217},
   Abstract = {In biomedical research, hierarchical models are very widely
             used to accommodate dependence in multivariate and
             longitudinal data and for borrowing of information across
             data from different sources. A primary concern in
             hierarchical modeling is sensitivity to parametric
             assumptions, such as linearity and normality of the random
             effects. Parametric assumptions on latent variable
             distributions can be challenging to check and are typically
             unwarranted, given available prior knowledge. This article
             reviews some recent developments in Bayesian nonparametric
             methods motivated by complex, multivariate and functional
             data collected in biomedical studies. The author provides a
             brief review of flexible parametric approaches relying on
             finite mixtures and latent class modeling. Dirichlet process
             mixture models are motivated by the need to generalize these
             approaches to avoid assuming a fixed finite number of
             classes. Focusing on an epidemiology application, the author
             illustrates the practical utility and potential of
             nonparametric Bayes methods.},
   Doi = {10.1002/bimj.200800183},
   Key = {fds257954}
}

@article{fds258056,
   Author = {MacLehose, RF and Dunson, DB},
   Title = {Nonparametric Bayes kernel-based priors for functional data
             analysis},
   Journal = {Statistica Sinica},
   Volume = {19},
   Number = {2},
   Pages = {611-629},
   Year = {2009},
   Month = {April},
   ISSN = {1017-0405},
   Abstract = {We focus on developing nonparametric Bayes methods for
             collections of dependent random functions, allowing
             individual curves to vary flexibly while adaptively
             borrowing information. A prior is proposed, which is
             expressed as a hierarchical mixture of weighted kernels
             placed at unknown locations. The induced prior for any
             individual function is shown to fall within a reproducing
             kernel Hilbert space. We allow flexible borrowing of
             information through the use of a hierarchical Dirichlet
             process prior for the random locations, along with a
             functional Dirichlet process for the weights. Theoretical
             properties are considered and an efficient MCMC algorithm is
             developed, relying on stick-breaking truncations. The
             methods are illustrated using simulation examples and an
             application to reproductive hormone data.},
   Key = {fds258056}
}

@article{fds258048,
   Author = {Ghosh, J and Dunson, DB},
   Title = {Default Prior Distributions and Efficient Posterior
             Computation in Bayesian Factor Analysis.},
   Journal = {Journal of Computational and Graphical Statistics : a Joint
             Publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {18},
   Number = {2},
   Pages = {306-320},
   Year = {2009},
   Month = {June},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1198/jcgs.2009.07145},
   Abstract = {Factor analytic models are widely used in social sciences.
             These models have also proven useful for sparse modeling of
             the covariance structure in multidimensional data. Normal
             prior distributions for factor loadings and inverse gamma
             prior distributions for residual variances are a popular
             choice because of their conditionally conjugate form.
             However, such prior distributions require elicitation of
             many hyperparameters and tend to result in poorly behaved
             Gibbs samplers. In addition, one must choose an informative
             specification, as high variance prior distributions face
             problems due to impropriety of the posterior distribution.
             This article proposes a default, heavy-tailed prior
             distribution specification, which is induced through
             parameter expansion while facilitating efficient posterior
             computation. We also develop an approach to allow
             uncertainty in the number of factors. The methods are
             illustrated through simulated examples and epidemiology and
             toxicology applications. Data sets and computer code used in
             this article are available online.},
   Doi = {10.1198/jcgs.2009.07145},
   Key = {fds258048}
}

@article{fds257955,
   Author = {Baird, DD and Travlos, G and Wilson, R and Dunson, DB and Hill, MC and D'Aloisio, AA and London, SJ and Schectman, JM},
   Title = {Uterine leiomyomata in relation to insulin-like growth
             factor-I, insulin, and diabetes.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {20},
   Number = {4},
   Pages = {604-610},
   Year = {2009},
   Month = {July},
   ISSN = {1044-3983},
   url = {http://dx.doi.org/10.1097/EDE.0b013e31819d8d3f},
   Abstract = {Insulin-like growth factor-I (IGF-I) and insulin stimulate
             cell proliferation in uterine leiomyoma (fibroid) tissue. We
             hypothesized that circulating levels of these proteins would
             be associated with increased prevalence and size of uterine
             fibroids.Participants were 35-49-year-old, randomly selected
             members of an urban health plan who were enrolled in the
             study in 1996-1999. Premenopausal participants were screened
             for fibroids with ultrasound. Fasting blood samples were
             collected. Associations between fibroids and diabetes,
             plasma IGF-I, IGF binding protein 3 (BP3), and insulin were
             evaluated for blacks (n = 585) and whites (n = 403) by using
             multiple logistic regression.IGF-I showed no association
             with fibroids in blacks, but in whites the adjusted odds
             ratios (aORs) for both mid and upper tertiles compared with
             the lowest tertile were 0.6 (95% confidence intervals [CI] =
             0.3-1.0 and 0.4-1.1, respectively). Insulin and diabetes
             both tended to be inversely associated with fibroids in
             blacks. The insulin association was with large fibroids; aOR
             for the upper insulin tertile relative to the lowest was 0.4
             (0.2-0.9). The aOR for diabetes was 0.5 (0.2-1.0).
             Associations of insulin and diabetes with fibroids were weak
             for whites. Binding protein 3 showed no association with
             fibroids.Contrary to our hypothesis, high circulating IGF-I
             and insulin were not related to increased fibroid
             prevalence. Instead, there was suggestion of the opposite.
             The inverse association with diabetes, although based on
             small numbers, is consistent with previously reported
             findings. Future studies might investigate vascular
             dysfunction as a mediator between hyperinsulinemia or
             diabetes and possible reduced risk of fibroids.},
   Doi = {10.1097/EDE.0b013e31819d8d3f},
   Key = {fds257955}
}

@article{fds258049,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Bayesian hierarchical functional data analysis via
             contaminated informative priors.},
   Journal = {Biometrics},
   Volume = {65},
   Number = {3},
   Pages = {772-780},
   Year = {2009},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2008.01163.x},
   Abstract = {A variety of flexible approaches have been proposed for
             functional data analysis, allowing both the mean curve and
             the distribution about the mean to be unknown. Such methods
             are most useful when there is limited prior information.
             Motivated by applications to modeling of temperature curves
             in the menstrual cycle, this article proposes a flexible
             approach for incorporating prior information in
             semiparametric Bayesian analyses of hierarchical functional
             data. The proposed approach is based on specifying the
             distribution of functions as a mixture of a parametric
             hierarchical model and a nonparametric contamination. The
             parametric component is chosen based on prior knowledge,
             while the contamination is characterized as a functional
             Dirichlet process. In the motivating application, the
             contamination component allows unanticipated curve shapes in
             unhealthy menstrual cycles. Methods are developed for
             posterior computation, and the approach is applied to data
             from a European fecundability study.},
   Doi = {10.1111/j.1541-0420.2008.01163.x},
   Key = {fds258049}
}

@article{fds257984,
   Author = {Ren, L and Dunson, DB and Lindroth, S and Carin, L},
   Title = {Music analysis with a Bayesian dynamic model},
   Journal = {2015 Ieee International Conference on Acoustics, Speech, and
             Signal Processing (Icassp)},
   Pages = {1681-1684},
   Publisher = {IEEE},
   Year = {2009},
   Month = {September},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2009.4959925},
   Abstract = {A Bayesian dynamic model is developed to model complex
             sequential data, with a focus on audio signals from music.
             The music is represented in terms of a sequence of discrete
             observations, and the sequence is modeled using a hidden
             Markov model (HMM) with time-evolving parameters. The model
             imposes the belief that observations that are temporally
             proximate are more likely to be drawn from HMMs with similar
             parameters, while also allowing for "innovation" associated
             with abrupt changes in the music texture. Segmentation of a
             given musical piece is constituted via the model inference
             and the results are compared with other models and also to a
             conventional music-theoretic analysis. ©2009
             IEEE.},
   Doi = {10.1109/ICASSP.2009.4959925},
   Key = {fds257984}
}

@article{fds257985,
   Author = {Wang, C and An, Q and Carin, L and Dunson, DB},
   Title = {Multi-task classification with infinite local
             experts},
   Journal = {2015 Ieee International Conference on Acoustics, Speech, and
             Signal Processing (Icassp)},
   Pages = {1569-1572},
   Publisher = {IEEE},
   Year = {2009},
   Month = {September},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2009.4959897},
   Abstract = {We propose a multi-task learning (MTL) framework for
             nonlinear classification, based on an infinite set of local
             experts in feature space. The usage of local experts enables
             sharing at the expert-level, encouraging the borrowing of
             information even if tasks are similar only in subregions of
             feature space. A kernel stick-breaking process (KSBP) prior
             is imposed on the underlying distribution of class labels,
             so that the number of experts is inferred in the posterior
             and thus model selection issues are avoided. The MTL is
             implemented by imposing a Dirichlet process (DP) prior on a
             layer above the task- dependent KSBPs. ©2009
             IEEE.},
   Doi = {10.1109/ICASSP.2009.4959897},
   Key = {fds257985}
}

@article{fds258024,
   Author = {Dunson, DB},
   Title = {Comment on article by Craigmile et al.},
   Journal = {Bayesian Analysis},
   Volume = {4},
   Number = {1},
   Pages = {41-44},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2009},
   Month = {December},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/09-BA401B},
   Doi = {10.1214/09-BA401B},
   Key = {fds258024}
}

@article{fds258044,
   Author = {Chung, Y and Dunson, DB},
   Title = {Nonparametric Bayes Conditional Distribution Modeling With
             Variable Selection.},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {488},
   Pages = {1646-1660},
   Year = {2009},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/4398 Duke open
             access},
   Abstract = {This article considers a methodology for flexibly
             characterizing the relationship between a response and
             multiple predictors. Goals are (1) to estimate the
             conditional response distribution addressing the
             distributional changes across the predictor space, and (2)
             to identify important predictors for the response
             distribution change both within local regions and globally.
             We first introduce the probit stick-breaking process (PSBP)
             as a prior for an uncountable collection of
             predictor-dependent random distributions and propose a PSBP
             mixture (PSBPM) of normal regressions for modeling the
             conditional distributions. A global variable selection
             structure is incorporated to discard unimportant predictors,
             while allowing estimation of posterior inclusion
             probabilities. Local variable selection is conducted relying
             on the conditional distribution estimates at different
             predictor points. An efficient stochastic search sampling
             algorithm is proposed for posterior computation. The methods
             are illustrated through simulation and applied to an
             epidemiologic study.},
   Doi = {10.1198/jasa.2009.tm08302},
   Key = {fds258044}
}

@article{fds257956,
   Author = {Mitra, R and Dunson, D},
   Title = {Two-level stochastic search variable selection in GLMs with
             missing predictors.},
   Journal = {The International Journal of Biostatistics},
   Volume = {6},
   Number = {1},
   Pages = {Article-33},
   Year = {2010},
   Month = {January},
   ISSN = {1557-4679},
   url = {http://dx.doi.org/10.2202/1557-4679.1173},
   Abstract = {Stochastic search variable selection (SSVS) algorithms
             provide an appealing and widely used approach for searching
             for good subsets of predictors while simultaneously
             estimating posterior model probabilities and model-averaged
             predictive distributions. This article proposes a two-level
             generalization of SSVS to account for missing predictors
             while accommodating uncertainty in the relationships between
             these predictors. Bayesian approaches for allowing
             predictors that are missing at random require a model on the
             joint distribution of the predictors. We show that
             predictive performance can be improved by allowing
             uncertainty in the specification of predictor relationships
             in this model. The methods are illustrated through
             simulation studies and analysis of an epidemiologic data
             set.},
   Doi = {10.2202/1557-4679.1173},
   Key = {fds257956}
}

@article{fds257991,
   Author = {Wang, C and Liao, X and Carin, L and Dunson, DB},
   Title = {Classification with Incomplete Data Using Dirichlet Process
             Priors.},
   Journal = {Journal of Machine Learning Research},
   Volume = {11},
   Pages = {3269-3311},
   Year = {2010},
   Month = {March},
   ISSN = {1532-4435},
   Abstract = {A non-parametric hierarchical Bayesian framework is
             developed for designing a classifier, based on a mixture of
             simple (linear) classifiers. Each simple classifier is
             termed a local "expert", and the number of experts and their
             construction are manifested via a Dirichlet process
             formulation. The simple form of the "experts" allows
             analytical handling of incomplete data. The model is
             extended to allow simultaneous design of classifiers on
             multiple data sets, termed multi-task learning, with this
             also performed non-parametrically via the Dirichlet process.
             Fast inference is performed using variational Bayesian (VB)
             analysis, and example results are presented for several data
             sets. We also perform inference via Gibbs sampling, to which
             we compare the VB results.},
   Key = {fds257991}
}

@article{fds258037,
   Author = {Rodríguez, A and Dunson, DB and Gelfand, AE},
   Title = {Latent Stick-Breaking Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {105},
   Number = {490},
   Pages = {647-659},
   Year = {2010},
   Month = {April},
   ISSN = {0162-1459},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23559690},
   Abstract = {We develop a model for stochastic processes with random
             marginal distributions. Our model relies on a stick-breaking
             construction for the marginal distribution of the process,
             and introduces dependence across locations by using a latent
             Gaussian copula model as the mechanism for selecting the
             atoms. The resulting latent stick-breaking process (LaSBP)
             induces a random partition of the index space, with points
             closer in space having a higher probability of being in the
             same cluster. We develop an efficient and straightforward
             Markov chain Monte Carlo (MCMC) algorithm for computation
             and discuss applications in financial econometrics and
             ecology. This article has supplementary material
             online.},
   Doi = {10.1198/jasa.2010.tm08241},
   Key = {fds258037}
}

@article{fds258039,
   Author = {Wang, L and Dunson, DB},
   Title = {Semiparametric bayes multiple testing: applications to tumor
             data.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {2},
   Pages = {493-501},
   Year = {2010},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01301.x},
   Abstract = {In National Toxicology Program (NTP) studies, investigators
             want to assess whether a test agent is carcinogenic overall
             and specific to certain tumor types, while estimating the
             dose-response profiles. Because there are potentially
             correlations among the tumors, a joint inference is
             preferred to separate univariate analyses for each tumor
             type. In this regard, we propose a random effect logistic
             model with a matrix of coefficients representing log-odds
             ratios for the adjacent dose groups for tumors at different
             sites. We propose appropriate nonparametric priors for these
             coefficients to characterize the correlations and to allow
             borrowing of information across different dose groups and
             tumor types. Global and local hypotheses can be easily
             evaluated by summarizing the output of a single Monte Carlo
             Markov chain (MCMC). Two multiple testing procedures are
             applied for testing local hypotheses based on the posterior
             probabilities of local alternatives. Simulation studies are
             conducted and an NTP tumor data set is analyzed illustrating
             the proposed approach.},
   Doi = {10.1111/j.1541-0420.2009.01301.x},
   Key = {fds258039}
}

@article{fds258041,
   Author = {Maclehose, RF and Dunson, DB},
   Title = {Bayesian semiparametric multiple shrinkage.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {2},
   Pages = {455-462},
   Year = {2010},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01275.x},
   Abstract = {High-dimensional and highly correlated data leading to non-
             or weakly identified effects are commonplace. Maximum
             likelihood will typically fail in such situations and a
             variety of shrinkage methods have been proposed. Standard
             techniques, such as ridge regression or the lasso, shrink
             estimates toward zero, with some approaches allowing
             coefficients to be selected out of the model by achieving a
             value of zero. When substantive information is available,
             estimates can be shrunk to nonnull values; however, such
             information may not be available. We propose a Bayesian
             semiparametric approach that allows shrinkage to multiple
             locations. Coefficients are given a mixture of heavy-tailed
             double exponential priors, with location and scale
             parameters assigned Dirichlet process hyperpriors to allow
             groups of coefficients to be shrunk toward the same,
             possibly nonzero, mean. Our approach favors sparse, but
             flexible, structure by shrinking toward a small number of
             random locations. The methods are illustrated using a study
             of genetic polymorphisms and Parkinson's
             disease.},
   Doi = {10.1111/j.1541-0420.2009.01275.x},
   Key = {fds258041}
}

@article{fds258042,
   Author = {Ren, L and Dunson, D and Lindroth, S and Carin, L},
   Title = {Dynamic nonparametric bayesian models for analysis of
             music},
   Journal = {Journal of the American Statistical Association},
   Volume = {105},
   Number = {490},
   Pages = {458-472},
   Publisher = {Informa UK Limited},
   Year = {2010},
   Month = {June},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/4397 Duke open
             access},
   Abstract = {The dynamic hierarchical Dirichlet process (dHDP) is
             developed to model complex sequential data, with a focus on
             audio signals from music. The music is represented in terms
             of a sequence of discrete observations, and the sequence is
             modeled using a hidden Markov model (HMM) with time-evolving
             parameters. The dHDP imposes the belief that observations
             that are temporally proximate are more likely to be drawn
             from HMMs with similar parameters, while also allowing for
             "innovation" associated with abrupt changes in the music
             texture. The sharing mechanisms of the time-evolving model
             are derived, and for inference a relatively simple Markov
             chain Monte Carlo sampler is developed. Segmentation of a
             given musical piece is constituted via the model inference.
             Detailed examples are presented on several pieces, with
             comparisons to other models. The dHDP results are also
             compared with a conventional music-theoretic analysis. All
             the supplemental materials used by this paper are available
             online. © 2010 American Statistical Association.},
   Doi = {10.1198/jasa.2009.ap08497},
   Key = {fds258042}
}

@article{fds258032,
   Author = {Bornkamp, B and Ickstadt, K and Dunson, D},
   Title = {Stochastically ordered multiple regression.},
   Journal = {Biostatistics (Oxford, England)},
   Volume = {11},
   Number = {3},
   Pages = {419-431},
   Year = {2010},
   Month = {July},
   ISSN = {1465-4644},
   url = {http://dx.doi.org/10.1093/biostatistics/kxq001},
   Abstract = {In various application areas, prior information is available
             about the direction of the effects of multiple predictors on
             the conditional response distribution. For example, in
             epidemiology studies of potentially adverse exposures and
             continuous health responses, one can typically assume a
             priori that increasing the level of an exposure does not
             lead to an improvement in the health response. Such an
             assumption can be formalized through a stochastic ordering
             assumption in each of the exposures, leading to a
             potentially large improvement in efficiency in nonparametric
             modeling of the conditional response distribution. This
             article proposes a Bayesian nonparametric approach to this
             problem based on characterizing the conditional response
             density as a Gaussian mixture, with the locations of the
             Gaussian means varying flexibly with predictors subject to
             minimal constraints to ensure stochastic ordering.
             Theoretical properties are considered and Markov chain Monte
             Carlo methods are developed for posterior computation. The
             methods are illustrated using simulation examples and a
             reproductive epidemiology application.},
   Doi = {10.1093/biostatistics/kxq001},
   Key = {fds258032}
}

@article{fds258043,
   Author = {Park, JH and Dunson, DB},
   Title = {Bayesian generalized product partition model},
   Journal = {Statistica Sinica},
   Volume = {20},
   Number = {3},
   Pages = {1203-1226},
   Year = {2010},
   Month = {July},
   ISSN = {1017-0405},
   url = {http://hdl.handle.net/10161/4623 Duke open
             access},
   Abstract = {Starting with a carefully formulated Dirichlet process (DP)
             mixture model, we derive a generalized product partition
             model (GPPM) in which the partition process is
             predictor-dependent. The GPPM generalizes DP clustering to
             relax the exchangeability assumption through the
             incorporation of predictors, resulting in a generalized
             Pólya urn scheme. In addition, the GPPM can be used for
             formulating flexible semiparametric Bayes models for
             conditional distribution estimation, bypassing the need for
             expensive computation of large numbers of unknowns
             characterizing priors for dependent collections of random
             probability measures. A variety of special cases are
             considered, and an efficient Gibbs sampling algorithm is
             developed for posterior computation. The methods are
             illustrated using simulation examples and an epidemiologic
             application.},
   Key = {fds258043}
}

@article{fds257958,
   Author = {Stanford, JB and Mikolajczyk, RT and Dunson, DB},
   Title = {Are Chinese people really more fertile?},
   Journal = {Fertility and Sterility},
   Volume = {94},
   Number = {3},
   Pages = {e58},
   Year = {2010},
   Month = {August},
   ISSN = {0015-0282},
   url = {http://dx.doi.org/10.1016/j.fertnstert.2010.05.004},
   Doi = {10.1016/j.fertnstert.2010.05.004},
   Key = {fds257958}
}

@article{fds257959,
   Author = {Yang, M and Dunson, DB and Baird, D},
   Title = {Semiparametric Bayes hierarchical models with mean and
             variance constraints.},
   Journal = {Computational Statistics & Data Analysis},
   Volume = {54},
   Number = {9},
   Pages = {2172-2186},
   Year = {2010},
   Month = {September},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2010.03.025},
   Abstract = {In parametric hierarchical models, it is standard practice
             to place mean and variance constraints on the latent
             variable distributions for the sake of identifiability and
             interpretability. Because incorporation of such constraints
             is challenging in semiparametric models that allow latent
             variable distributions to be unknown, previous methods
             either constrain the median or avoid constraints. In this
             article, we propose a centered stick-breaking process
             (CSBP), which induces mean and variance constraints on an
             unknown distribution in a hierarchical model. This is
             accomplished by viewing an unconstrained stick-breaking
             process as a parameter-expanded version of a CSBP. An
             efficient blocked Gibbs sampler is developed for approximate
             posterior computation. The methods are illustrated through a
             simulated example and an epidemiologic application.},
   Doi = {10.1016/j.csda.2010.03.025},
   Key = {fds257959}
}

@article{fds258034,
   Author = {Cai, B and Dunson, DB and Stanford, JB},
   Title = {Dynamic model for multivariate markers of
             fecundability.},
   Journal = {Biometrics},
   Volume = {66},
   Number = {3},
   Pages = {905-913},
   Year = {2010},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2009.01327.x},
   Abstract = {Dynamic latent class models provide a flexible framework for
             studying biologic processes that evolve over time. Motivated
             by studies of markers of the fertile days of the menstrual
             cycle, we propose a discrete-time dynamic latent class
             framework, allowing change points to depend on time, fixed
             predictors, and random effects. Observed data consist of
             multivariate categorical indicators, which change
             dynamically in a flexible manner according to latent class
             status. Given the flexibility of the framework, which
             incorporates semi-parametric components using mixtures of
             betas, identifiability constraints are needed to define the
             latent classes. Such constraints are most appropriately
             based on the known biology of the process. The Bayesian
             method is developed particularly for analyzing mucus symptom
             data from a study of women using natural family
             planning.},
   Doi = {10.1111/j.1541-0420.2009.01327.x},
   Key = {fds258034}
}

@article{fds258046,
   Author = {Dunson, DB},
   Title = {MULTIVARIATE KERNEL PARTITION PROCESS MIXTURES.},
   Journal = {Statistica Sinica},
   Volume = {20},
   Number = {4},
   Pages = {1395-1422},
   Year = {2010},
   Month = {October},
   ISSN = {1017-0405},
   Abstract = {Mixtures provide a useful approach for relaxing parametric
             assumptions. Discrete mixture models induce clusters,
             typically with the same cluster allocation for each
             parameter in multivariate cases. As a more flexible approach
             that facilitates sparse nonparametric modeling of
             multivariate random effects distributions, this article
             proposes a kernel partition process (KPP) in which the
             cluster allocation varies for different parameters. The KPP
             is shown to be the driving measure for a multivariate
             ordered Chinese restaurant process that induces a
             highly-flexible dependence structure in local clustering.
             This structure allows the relative locations of the random
             effects to inform the clustering process, with
             spatially-proximal random effects likely to be assigned the
             same cluster index. An exact block Gibbs sampler is
             developed for posterior computation, avoiding truncation of
             the infinite measure. The methods are applied to hormone
             curve data, and a dependent KPP is proposed for
             classification from functional predictors.},
   Key = {fds258046}
}

@article{fds257988,
   Author = {Blei, D and Carin, L and Dunson, D},
   Title = {Probabilistic Topic Models: A focus on graphical model
             design and applications to document and image
             analysis.},
   Journal = {Ieee Signal Processing Magazine},
   Volume = {27},
   Number = {6},
   Pages = {55-65},
   Year = {2010},
   Month = {November},
   ISSN = {1053-5888},
   url = {http://dx.doi.org/10.1109/MSP.2010.938079},
   Abstract = {In this article, we review probabilistic topic models:
             graphical models that can be used to summarize a large
             collection of documents with a smaller number of
             distributions over words. Those distributions are called
             ¿topics¿ because, when fit to data, they capture the
             salient themes that run through the collection. We describe
             both finite-dimensional parametric topic models and their
             Bayesian nonparametric counterparts, which are based on the
             hierarchical Dirichlet process (HDP). We discuss two
             extensions of topic models to time-series data¿one that
             lets the topics slowly change over time and one that lets
             the assumed prevalence of the topics change. Finally, we
             illustrate the application of topic models to nontext data,
             summarizing some recent research results in image analysis.
             © 2010 IEEE.},
   Doi = {10.1109/MSP.2010.938079},
   Key = {fds257988}
}

@article{fds257990,
   Author = {Chen, B and Chen, M and Paisley, J and Zaas, A and Woods, C and Ginsburg,
             GS and Hero, A and Lucas, J and Dunson, D and Carin,
             L},
   Title = {Bayesian inference of the number of factors in
             gene-expression analysis: application to human virus
             challenge studies.},
   Journal = {Bmc Bioinformatics},
   Volume = {11},
   Pages = {552},
   Year = {2010},
   Month = {November},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21062443},
   Abstract = {BACKGROUND: Nonparametric Bayesian techniques have been
             developed recently to extend the sophistication of factor
             models, allowing one to infer the number of appropriate
             factors from the observed data. We consider such techniques
             for sparse factor analysis, with application to
             gene-expression data from three virus challenge studies.
             Particular attention is placed on employing the Beta Process
             (BP), the Indian Buffet Process (IBP), and related
             sparseness-promoting techniques to infer a proper number of
             factors. The posterior density function on the model
             parameters is computed using Gibbs sampling and variational
             Bayesian (VB) analysis. RESULTS: Time-evolving
             gene-expression data are considered for respiratory
             syncytial virus (RSV), Rhino virus, and influenza, using
             blood samples from healthy human subjects. These data were
             acquired in three challenge studies, each executed after
             receiving institutional review board (IRB) approval from
             Duke University. Comparisons are made between several
             alternative means of per-forming nonparametric factor
             analysis on these data, with comparisons as well to
             sparse-PCA and Penalized Matrix Decomposition (PMD), closely
             related non-Bayesian approaches. CONCLUSIONS: Applying the
             Beta Process to the factor scores, or to the singular values
             of a pseudo-SVD construction, the proposed algorithms infer
             the number of factors in gene-expression data. For real data
             the "true" number of factors is unknown; in our simulations
             we consider a range of noise variances, and the proposed
             Bayesian models inferred the number of factors accurately
             relative to other methods in the literature, such as
             sparse-PCA and PMD. We have also identified a "pan-viral"
             factor of importance for each of the three viruses
             considered in this study. We have identified a set of genes
             associated with this pan-viral factor, of interest for early
             detection of such viruses based upon the host response, as
             quantified via gene-expression data.},
   Doi = {10.1186/1471-2105-11-552},
   Key = {fds257990}
}

@article{fds257963,
   Author = {Gordon, GJ and Dunson, D},
   Title = {Preface to the Proceedings of AISTATS 2011},
   Journal = {Journal of Machine Learning Research},
   Volume = {9},
   Pages = {1-2},
   Year = {2010},
   Month = {December},
   ISSN = {1532-4435},
   Key = {fds257963}
}

@article{fds257987,
   Author = {Wang, E and Liu, D and Silva, J and Dunson, D and Carin,
             L},
   Title = {Joint analysis of time-evolving binary matrices and
             associated documents},
   Journal = {Advances in Neural Information Processing Systems 23: 24th
             Annual Conference on Neural Information Processing Systems
             2010, Nips 2010},
   Year = {2010},
   Month = {December},
   Abstract = {We consider problems for which one has incomplete binary
             matrices that evolve with time (e:g:, the votes of
             legislators on particular legislation, with each year
             characterized by a different such matrix). An objective of
             such analysis is to infer structure and inter-relationships
             underlying the matrices, here defined by latent features
             associated with each axis of the matrix. In addition, it is
             assumed that documents are available for the entities
             associated with at least one of the matrix axes. By jointly
             analyzing the matrices and documents, one may be used to
             inform the other within the analysis, and the model offers
             the opportunity to predict matrix values (e:g:, votes) based
             only on an associated document (e:g:, legislation). The
             research presented here merges two areas of machine-learning
             that have previously been investigated separately:
             incomplete-matrix analysis and topic modeling. The analysis
             is performed from a Bayesian perspective, with efficient
             inference constituted via Gibbs sampling. The framework is
             demonstrated by considering all voting data and available
             documents (legislation) during the 220-year lifetime of the
             United States Senate and House of Representatives.},
   Key = {fds257987}
}

@article{fds257989,
   Author = {Chen, M and Silva, J and Paisley, J and Wang, C and Dunson, D and Carin,
             L},
   Title = {Compressive Sensing on Manifolds Using a Nonparametric
             Mixture of Factor Analyzers: Algorithm and Performance
             Bounds.},
   Journal = {Ieee Transactions on Signal Processing},
   Volume = {58},
   Number = {12},
   Pages = {6140-6155},
   Year = {2010},
   Month = {December},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2010.2070796},
   Abstract = {Nonparametric Bayesian methods are employed to constitute a
             mixture of low-rank Gaussians, for data x ∈ ℝ (N) that
             are of high dimension N but are constrained to reside in a
             low-dimensional subregion of ℝ (N) . The number of mixture
             components and their rank are inferred automatically from
             the data. The resulting algorithm can be used for learning
             manifolds and for reconstructing signals from manifolds,
             based on compressive sensing (CS) projection measurements.
             The statistical CS inversion is performed analytically. We
             derive the required number of CS random measurements needed
             for successful reconstruction, based on easily-computed
             quantities, drawing on block-sparsity properties. The
             proposed methodology is validated on several synthetic and
             real datasets.},
   Doi = {10.1109/TSP.2010.2070796},
   Key = {fds257989}
}

@article{fds258036,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Nonparametric Bayesian density estimation on manifolds with
             applications to planar shapes.},
   Journal = {Biometrika},
   Volume = {97},
   Number = {4},
   Pages = {851-865},
   Year = {2010},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22822255},
   Abstract = {Statistical analysis on landmark-based shape spaces has
             diverse applications in morphometrics, medical diagnostics,
             machine vision and other areas. These shape spaces are
             non-Euclidean quotient manifolds. To conduct nonparametric
             inferences, one may define notions of centre and spread on
             this manifold and work with their estimates. However, it is
             useful to consider full likelihood-based methods, which
             allow nonparametric estimation of the probability density.
             This article proposes a broad class of mixture models
             constructed using suitable kernels on a general compact
             metric space and then on the planar shape space in
             particular. Following a Bayesian approach with a
             nonparametric prior on the mixing distribution, conditions
             are obtained under which the Kullback-Leibler property
             holds, implying large support and weak posterior
             consistency. Gibbs sampling methods are developed for
             posterior computation, and the methods are applied to
             problems in density estimation and classification with
             shape-based predictors. Simulation studies show improved
             estimation performance relative to existing
             approaches.},
   Doi = {10.1093/biomet/asq044},
   Key = {fds258036}
}

@article{fds258060,
   Author = {Yang, M and Dunson, DB},
   Title = {Bayesian semiparametric structural equation models with
             latent variables},
   Journal = {Psychometrika},
   Volume = {75},
   Number = {4},
   Pages = {675-693},
   Publisher = {Springer Nature},
   Year = {2010},
   Month = {December},
   ISSN = {0033-3123},
   url = {http://dx.doi.org/10.1007/s11336-010-9174-4},
   Abstract = {Structural equation models (SEMs) with latent variables are
             widely useful for sparse covariance structure modeling and
             for inferring relationships among latent variables. Bayesian
             SEMs are appealing in allowing for the incorporation of
             prior information and in providing exact posterior
             distributions of unknowns, including the latent variables.
             In this article, we propose a broad class of semiparametric
             Bayesian SEMs, which allow mixed categorical and continuous
             manifest variables while also allowing the latent variables
             to have unknown distributions. In order to include typical
             identifiability restrictions on the latent variable
             distributions, we rely on centered Dirichlet process (CDP)
             and CDP mixture (CDPM) models. The CDP will induce a latent
             class model with an unknown number of classes, while the
             CDPM will induce a latent trait model with unknown densities
             for the latent traits. A simple and efficient Markov chain
             Monte Carlo algorithm is developed for posterior
             computation, and the methods are illustrated using simulated
             examples, and several applications. © 2010 The Psychometric
             Society.},
   Doi = {10.1007/s11336-010-9174-4},
   Key = {fds258060}
}

@article{fds257986,
   Author = {Zhou, M and Wang, C and Chen, M and Paisley, J and Dunson, D and Carin,
             L},
   Title = {Nonparametric bayesian matrix completion},
   Journal = {2010 Ieee Sensor Array and Multichannel Signal Processing
             Workshop, Sam 2010},
   Pages = {213-216},
   Publisher = {IEEE},
   Year = {2010},
   Month = {December},
   url = {http://dx.doi.org/10.1109/SAM.2010.5606741},
   Abstract = {The Beta-Binomial processes are considered for inferring
             missing values in matrices. The model moves beyond the
             low-rank assumption, modeling the matrix columns as residing
             in a nonlinear subspace. Large-scale problems are considered
             via efficient Gibbs sampling, yielding predictions as well
             as a measure of confidence in each prediction. Algorithm
             performance is considered for several datasets, with
             encouraging performance relative to existing approaches. ©
             2010 IEEE.},
   Doi = {10.1109/SAM.2010.5606741},
   Key = {fds257986}
}

@article{fds257978,
   Author = {Armagan, A and Dunson, DB and Clyde, MA},
   Title = {Generalized Beta Mixtures of Gaussians},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {24},
   Pages = {523-531},
   Publisher = {Neural Information Processing Systems Foundation,
             Inc},
   Editor = {Shawe-Taylor, J and Zemel, RS and Bartlett, PL},
   Year = {2011},
   Abstract = {In recent years, a rich variety of shrinkage priors have
             been proposed that have great promise in addressing massive
             regression problems. In general, these new priors can be
             expressed as scale mixtures of normals, but have more
             complex forms and better properties than traditional Cauchy
             and double exponential priors. We first propose a new class
             of normal scale mixtures through a novel generalized beta
             distribution that encompasses many interesting priors as
             special cases. This encompassing framework should prove
             useful in comparing competing priors, considering properties
             and revealing close connections. We then develop a class of
             variational Bayes approximations through the new hierarchy
             presented that will scale more efficiently to the types of
             truly massive data sets that are now encountered
             routinely.},
   Key = {fds257978}
}

@article{fds257992,
   Author = {Ren, L and Du, L and Carin, L and Dunson, DB},
   Title = {Logistic Stick-Breaking Process.},
   Journal = {Journal of Machine Learning Research},
   Volume = {12},
   Number = {Jan},
   Pages = {203-239},
   Year = {2011},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {A logistic stick-breaking process (LSBP) is proposed for
             non-parametric clustering of general spatially- or
             temporally-dependent data, imposing the belief that
             proximate data are more likely to be clustered together. The
             sticks in the LSBP are realized via multiple logistic
             regression functions, with shrinkage priors employed to
             favor contiguous and spatially localized segments. The LSBP
             is also extended for the simultaneous processing of multiple
             data sets, yielding a hierarchical logistic stick-breaking
             process (H-LSBP). The model parameters (atoms) within the
             H-LSBP are shared across the multiple learning tasks.
             Efficient variational Bayesian inference is derived, and
             comparisons are made to related techniques in the
             literature. Experimental analysis is performed for audio
             waveforms and images, and it is demonstrated that for
             segmentation applications the LSBP yields generally
             homogeneous segments with sharp boundaries.},
   Key = {fds257992}
}

@article{fds257995,
   Author = {Chen, H and Dunson, DB and Carin, L},
   Title = {Topic Modeling with Nonparametric Markov
             Tree.},
   Journal = {Proceedings of the ... International Conference on Machine
             Learning. International Conference on Machine
             Learning},
   Volume = {2011},
   Pages = {377-384},
   Year = {2011},
   Month = {January},
   Abstract = {A new hierarchical tree-based topic model is developed,
             based on nonparametric Bayesian techniques. The model has
             two unique attributes: (i) a child node in the tree may have
             more than one parent, with the goal of eliminating redundant
             sub-topics deep in the tree; and (ii) parsimonious
             sub-topics are manifested, by removing redundant usage of
             words at multiple scales. The depth and width of the tree
             are unbounded within the prior, with a retrospective sampler
             employed to adaptively infer the appropriate tree size based
             upon the corpus under study. Excellent quantitative results
             are manifested on five standard data sets, and the inferred
             tree structure is also found to be highly
             interpretable.},
   Key = {fds257995}
}

@article{fds257996,
   Author = {Zhang, X and Dunson, DB and Carin, L},
   Title = {Tree-Structured Infinite Sparse Factor Model.},
   Journal = {Proceedings of the ... International Conference on Machine
             Learning. International Conference on Machine
             Learning},
   Volume = {2011},
   Pages = {785-792},
   Year = {2011},
   Month = {January},
   Abstract = {A tree-structured multiplicative gamma process (TMGP) is
             developed, for inferring the depth of a tree-based
             factor-analysis model. This new model is coupled with the
             nested Chinese restaurant process, to nonparametrically
             infer the depth and width (structure) of the tree. In
             addition to developing the model, theoretical properties of
             the TMGP are addressed, and a novel MCMC sampler is
             developed. The structure of the inferred tree is used to
             learn relationships between high-dimensional data, and the
             model is also applied to compressive sensing and
             interpolation of incomplete images.},
   Key = {fds257996}
}

@article{fds258002,
   Author = {Chen, M and Zaas, A and Woods, C and Ginsburg, GS and Lucas, J and Dunson,
             D and Carin, L},
   Title = {Predicting Viral Infection From High-Dimensional Biomarker
             Trajectories.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {496},
   Pages = {1259-1279},
   Year = {2011},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2011.ap10611},
   Abstract = {There is often interest in predicting an individual's latent
             health status based on high-dimensional biomarkers that vary
             over time. Motivated by time-course gene expression array
             data that we have collected in two influenza challenge
             studies performed with healthy human volunteers, we develop
             a novel time-aligned Bayesian dynamic factor analysis
             methodology. The time course trajectories in the gene
             expressions are related to a relatively low-dimensional
             vector of latent factors, which vary dynamically starting at
             the latent initiation time of infection. Using a
             nonparametric cure rate model for the latent initiation
             times, we allow selection of the genes in the viral response
             pathway, variability among individuals in infection times,
             and a subset of individuals who are not infected. As we
             demonstrate using held-out data, this statistical framework
             allows accurate predictions of infected individuals in
             advance of the development of clinical symptoms, without
             labeled data and even when the number of biomarkers vastly
             exceeds the number of individuals under study. Biological
             interpretation of several of the inferred pathways (factors)
             is provided.},
   Doi = {10.1198/jasa.2011.ap10611},
   Key = {fds258002}
}

@article{fds258040,
   Author = {Wang, L and Dunson, DB},
   Title = {Fast Bayesian Inference in Dirichlet Process Mixture
             Models.},
   Journal = {Journal of Computational and Graphical Statistics : a Joint
             Publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {20},
   Number = {1},
   Pages = {196-216},
   Publisher = {Informa UK Limited},
   Year = {2011},
   Month = {January},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1198/jcgs.2010.07081},
   Abstract = {There has been increasing interest in applying Bayesian
             nonparametric methods in large samples and high dimensions.
             As Markov chain Monte Carlo (MCMC) algorithms are often
             infeasible, there is a pressing need for much faster
             algorithms. This article proposes a fast approach for
             inference in Dirichlet process mixture (DPM) models. Viewing
             the partitioning of subjects into clusters as a model
             selection problem, we propose a sequential greedy search
             algorithm for selecting the partition. Then, when conjugate
             priors are chosen, the resulting posterior conditionally on
             the selected partition is available in closed form. This
             approach allows testing of parametric models versus
             nonparametric alternatives based on Bayes factors. We
             evaluate the approach using simulation studies and compare
             it with four other fast nonparametric methods in the
             literature. We apply the proposed approach to three datasets
             including one from a large epidemiologic study. Matlab codes
             for the simulation and data analyses using the proposed
             approach are available online in the supplemental
             materials.},
   Doi = {10.1198/jcgs.2010.07081},
   Key = {fds258040}
}

@article{fds258030,
   Author = {Shi, M and Dunson, DB},
   Title = {Bayesian Variable Selection via Particle Stochastic
             Search.},
   Journal = {Statistics & Probability Letters},
   Volume = {81},
   Number = {2},
   Pages = {283-291},
   Year = {2011},
   Month = {February},
   ISSN = {0167-7152},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21278860},
   Abstract = {We focus on Bayesian variable selection in regression
             models. One challenge is to search the huge model space
             adequately, while identifying high posterior probability
             regions. In the past decades, the main focus has been on the
             use of Markov chain Monte Carlo (MCMC) algorithms for these
             purposes. In this article, we propose a new computational
             approach based on sequential Monte Carlo (SMC), which we
             refer to as particle stochastic search (PSS). We illustrate
             PSS through applications to linear regression and probit
             models.},
   Doi = {10.1016/j.spl.2010.10.011},
   Key = {fds258030}
}

@article{fds258055,
   Author = {Chung, Y and Dunson, DB},
   Title = {The local Dirichlet process.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {63},
   Number = {1},
   Pages = {59-80},
   Year = {2011},
   Month = {February},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-008-0218-9},
   Abstract = {As a generalization of the Dirichlet process (DP) to allow
             predictor dependence, we propose a local Dirichlet process
             (lDP). The lDP provides a prior distribution for a
             collection of random probability measures indexed by
             predictors. This is accomplished by assigning stick-breaking
             weights and atoms to random locations in a predictor space.
             The probability measure at a given predictor value is then
             formulated using the weights and atoms located in a
             neighborhood about that predictor value. This construction
             results in a marginal DP prior for the random measure at any
             specific predictor value. Dependence is induced through
             local sharing of random components. Theoretical properties
             are considered and a blocked Gibbs sampler is proposed for
             posterior computation in lDP mixture models. The methods are
             illustrated using simulated examples and an epidemiologic
             application.},
   Doi = {10.1007/s10463-008-0218-9},
   Key = {fds258055}
}

@article{fds257960,
   Author = {Rodríguez, A and Dunson, DB},
   Title = {Nonparametric Bayesian models through probit stick-breaking
             processes.},
   Journal = {Bayesian Analysis},
   Volume = {6},
   Number = {1},
   Pages = {145-178},
   Year = {2011},
   Month = {March},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/11-BA605},
   Abstract = {We describe a novel class of Bayesian nonparametric priors
             based on stick-breaking constructions where the weights of
             the process are constructed as probit transformations of
             normal random variables. We show that these priors are
             extremely flexible, allowing us to generate a great variety
             of models while preserving computational simplicity.
             Particular emphasis is placed on the construction of rich
             temporal and spatial processes, which are applied to two
             problems in finance and ecology.},
   Doi = {10.1214/11-BA605},
   Key = {fds257960}
}

@article{fds257964,
   Author = {Pati, D and Reich, BJ and Dunson, DB},
   Title = {Bayesian geostatistical modelling with informative sampling
             locations.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {1},
   Pages = {35-48},
   Year = {2011},
   Month = {March},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asq067},
   Abstract = {We consider geostatistical models that allow the locations
             at which data are collected to be informative about the
             outcomes. A Bayesian approach is proposed, which models the
             locations using a log Gaussian Cox process, while modelling
             the outcomes conditionally on the locations as Gaussian with
             a Gaussian process spatial random effect and adjustment for
             the location intensity process. We prove posterior propriety
             under an improper prior on the parameter controlling the
             degree of informative sampling, demonstrating that the data
             are informative. In addition, we show that the density of
             the locations and mean function of the outcome process can
             be estimated consistently under mild assumptions. The
             methods show significant evidence of informative sampling
             when applied to ozone data over Eastern U.S.A.},
   Doi = {10.1093/biomet/asq067},
   Key = {fds257964}
}

@article{fds258000,
   Author = {Chen, M and Silva, J and Paisley, J and Wang, C and Dunson, D and Carin,
             L},
   Title = {Erratum: Compressive sensing on manifolds using a
             nonparametric mixture of factor analyzers: Algorithm and
             performance bounds (IEEE Transactions Signal Processing
             (2011)) 58,12 (6140-6155))},
   Journal = {Ieee Transactions on Signal Processing},
   Volume = {59},
   Number = {3},
   Pages = {1329},
   Publisher = {Institute of Electrical and Electronics Engineers
             (IEEE)},
   Year = {2011},
   Month = {March},
   ISSN = {1053-587X},
   url = {http://dx.doi.org/10.1109/TSP.2011.2107810},
   Doi = {10.1109/TSP.2011.2107810},
   Key = {fds258000}
}

@article{fds258001,
   Author = {Carin, L and Baraniuk, RG and Cevher, V and Dunson, D and Jordan, MI and Sapiro, G and Wakin, MB},
   Title = {Learning Low-Dimensional Signal Models: A Bayesian approach
             based on incomplete measurements.},
   Journal = {Ieee Signal Processing Magazine},
   Volume = {28},
   Number = {2},
   Pages = {39-51},
   Year = {2011},
   Month = {March},
   ISSN = {1053-5888},
   url = {http://dx.doi.org/10.1109/MSP.2010.939733},
   Abstract = {Sampling, coding, and streaming even the most essential
             data, e.g., in medical imaging and weather-monitoring
             applications, produce a data deluge that severely stresses
             the available analog-to-digital converter, communication
             bandwidth, and digital-storage resources. Surprisingly,
             while the ambient data dimension is large in many problems,
             the relevant information in the data can reside in a much
             lower dimensional space. © 2006 IEEE.},
   Doi = {10.1109/MSP.2010.939733},
   Key = {fds258001}
}

@article{fds258038,
   Author = {Reich, BJ and Fuentes, M and Dunson, DB},
   Title = {Bayesian Spatial Quantile Regression.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {493},
   Pages = {6-20},
   Year = {2011},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://hdl.handle.net/10161/2981 Duke open
             access},
   Abstract = {Tropospheric ozone is one of the six criteria pollutants
             regulated by the United States Environmental Protection
             Agency under the Clean Air Act and has been linked with
             several adverse health effects, including mortality. Due to
             the strong dependence on weather conditions, ozone may be
             sensitive to climate change and there is great interest in
             studying the potential effect of climate change on ozone,
             and how this change may affect public health. In this paper
             we develop a Bayesian spatial model to predict ozone under
             different meteorological conditions, and use this model to
             study spatial and temporal trends and to forecast ozone
             concentrations under different climate scenarios. We develop
             a spatial quantile regression model that does not assume
             normality and allows the covariates to affect the entire
             conditional distribution, rather than just the mean. The
             conditional distribution is allowed to vary from
             site-to-site and is smoothed with a spatial prior. For
             extremely large datasets our model is computationally
             infeasible, and we develop an approximate method. We apply
             the approximate version of our model to summer ozone from
             1997-2005 in the Eastern U.S., and use deterministic climate
             models to project ozone under future climate conditions. Our
             analysis suggests that holding all other factors fixed, an
             increase in daily average temperature will lead to the
             largest increase in ozone in the Industrial Midwest and
             Northeast.},
   Doi = {10.1198/jasa.2010.ap09237},
   Key = {fds258038}
}

@article{fds257965,
   Author = {Dzirasa, K and McGarity, DL and Bhattacharya, A and Kumar, S and Takahashi, JS and Dunson, D and McClung, CA and Nicolelis,
             MAL},
   Title = {Impaired limbic gamma oscillatory synchrony during
             anxiety-related behavior in a genetic mouse model of bipolar
             mania.},
   Journal = {Journal of Neuroscience},
   Volume = {31},
   Number = {17},
   Pages = {6449-6456},
   Year = {2011},
   Month = {April},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21525286},
   Abstract = {Alterations in anxiety-related processing are observed
             across many neuropsychiatric disorders, including bipolar
             disorder. Though polymorphisms in a number of circadian
             genes confer risk for this disorder, little is known about
             how changes in circadian gene function disrupt brain
             circuits critical for anxiety-related processing. Here we
             characterize neurophysiological activity simultaneously
             across five limbic brain areas (nucleus accumbens, amygdala,
             prelimbic cortex, ventral hippocampus, and ventral tegmental
             area) as wild-type (WT) mice and mice with a mutation in the
             circadian gene, CLOCK (Clock-Δ19 mice) perform an elevated
             zero maze task. In WT mice, basal limbic gamma oscillatory
             synchrony observed before task performance predicted future
             anxiety-related behaviors. Additionally, dynamic changes in
             limbic gamma oscillatory synchrony were observed based on
             the position of WT mice in the zero maze. Clock-Δ19 mice,
             which displayed an increased propensity to enter the open
             section of the elevated maze, showed profound deficits in
             these anxiety-related circuit processes. Thus, our findings
             link the anxiety-related behavioral deficits observed in
             Clock-Δ19 mice with dysfunctional gamma oscillatory tuning
             across limbic circuits and suggest that alterations in
             limbic oscillatory circuit function induced by circadian
             gene polymorphisms may contribute to the behavioral
             manifestations seen in bipolar mania.},
   Doi = {10.1523/JNEUROSCI.6144-10.2011},
   Key = {fds257965}
}

@article{fds257967,
   Author = {Page, GL and Dunson, DB},
   Title = {Bayesian Local Contamination Models for Multivariate
             Outliers.},
   Journal = {Technometrics},
   Volume = {53},
   Number = {2},
   Pages = {152-162},
   Year = {2011},
   Month = {May},
   ISSN = {0040-1706},
   url = {http://dx.doi.org/10.1198/TECH.2011.10041},
   Abstract = {In studies where data are generated from multiple locations
             or sources it is common for there to exist observations that
             are quite unlike the majority. Motivated by the application
             of establishing a reference value in an inter-laboratory
             setting when outlying labs are present, we propose a local
             contamination model that is able to accommodate unusual
             multivariate realizations in a flexible way. The proposed
             method models the process level of a hierarchical model
             using a mixture with a parametric component and a possibly
             nonparametric contamination. Much of the flexibility in the
             methodology is achieved by allowing varying random subsets
             of the elements in the lab-specific mean vectors to be
             allocated to the contamination component. Computational
             methods are developed and the methodology is compared to
             three other possible approaches using a simulation study. We
             apply the proposed method to a NIST/NOAA sponsored
             inter-laboratory study which motivated the methodological
             development.},
   Doi = {10.1198/TECH.2011.10041},
   Key = {fds257967}
}

@article{fds258033,
   Author = {Crandell, JL and Dunson, DB},
   Title = {Posterior simulation across nonparametric models for
             functional clustering},
   Journal = {Sankhya B},
   Volume = {73},
   Number = {1},
   Pages = {42-61},
   Publisher = {Springer Nature},
   Year = {2011},
   Month = {May},
   ISSN = {0972-7671},
   url = {http://dx.doi.org/10.1007/s13571-011-0014-z},
   Abstract = {© 2011, Indian Statistical Institute. By choosing a species
             sampling random probability measure for the distribution of
             the basis coefficients, a general class of nonparametric
             Bayesian methods for clustering of functional data is
             developed. Allowing the basis functions to be unknown, one
             faces the problem of posterior simulation over a
             high-dimensional space of semiparametric models. To address
             this problem, we propose a novel Metropolis-Hastings
             algorithm for moving between models, with a nested
             generalized collapsed Gibbs sampler for updating the model
             parameters. Focusing on Dirichlet process priors for the
             distribution of the basis coefficients in multivariate
             linear spline models, we apply the approach to the problem
             of clustering of hormone trajectories. This approach allows
             the number of clusters and the shape of the trajectories
             within each cluster to be unknown. The methodology can be
             applied broadly to allow uncertainty in variable selection
             in semiparametric Bayes hierarchical models.},
   Doi = {10.1007/s13571-011-0014-z},
   Key = {fds258033}
}

@article{fds257966,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Sparse Bayesian infinite factor models.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {2},
   Pages = {291-306},
   Year = {2011},
   Month = {June},
   ISSN = {0006-3444},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23049129},
   Abstract = {We focus on sparse modelling of high-dimensional covariance
             matrices using Bayesian latent factor models. We propose a
             multiplicative gamma process shrinkage prior on the factor
             loadings which allows introduction of infinitely many
             factors, with the loadings increasingly shrunk towards zero
             as the column index increases. We use our prior on a
             parameter-expanded loading matrix to avoid the order
             dependence typical in factor analysis models and develop an
             efficient Gibbs sampler that scales well as data
             dimensionality increases. The gain in efficiency is achieved
             by the joint conjugacy property of the proposed prior, which
             allows block updating of the loadings matrix. We propose an
             adaptive Gibbs sampler for automatically truncating the
             infinite loading matrix through selection of the number of
             important factors. Theoretical results are provided on the
             support of the prior and truncation approximation bounds. A
             fast algorithm is proposed to produce approximate Bayes
             estimates. Latent factor regression methods are developed
             for prediction and variable selection in applications with
             high-dimensional correlated predictors. Operating
             characteristics are assessed through simulation studies, and
             the approach is applied to predict survival times from gene
             expression data.},
   Doi = {10.1093/biomet/asr013},
   Key = {fds257966}
}

@article{fds258035,
   Author = {Liu, F and Dunson, D and Zou, F},
   Title = {High-dimensional variable selection in meta-analysis for
             censored data.},
   Journal = {Biometrics},
   Volume = {67},
   Number = {2},
   Pages = {504-512},
   Year = {2011},
   Month = {June},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2010.01466.x},
   Abstract = {This article considers the problem of selecting predictors
             of time to an event from a high-dimensional set of candidate
             predictors using data from multiple studies. As an
             alternative to the current multistage testing approaches, we
             propose to model the study-to-study heterogeneity explicitly
             using a hierarchical model to borrow strength. Our method
             incorporates censored data through an accelerated failure
             time model. Using a carefully formulated prior
             specification, we develop a fast approach to predictor
             selection and shrinkage estimation for high-dimensional
             predictors. For model fitting, we develop a Monte Carlo
             expectation maximization (MC-EM) algorithm to accommodate
             censored data. The proposed approach, which is related to
             the relevance vector machine (RVM), relies on maximum a
             posteriori estimation to rapidly obtain a sparse estimate.
             As for the typical RVM, there is an intrinsic thresholding
             property in which unimportant predictors tend to have their
             coefficients shrunk to zero. We compare our method with some
             commonly used procedures through simulation studies. We also
             illustrate the method using the gene expression barcode data
             from three breast cancer studies.},
   Doi = {10.1111/j.1541-0420.2010.01466.x},
   Key = {fds258035}
}

@article{fds257968,
   Author = {Xing, C and Dunson, DB},
   Title = {Bayesian inference for genomic data integration reduces
             misclassification rate in predicting protein-protein
             interactions.},
   Journal = {Plos Computational Biology},
   Volume = {7},
   Number = {7},
   Pages = {e1002110},
   Year = {2011},
   Month = {July},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21829334},
   Abstract = {Protein-protein interactions (PPIs) are essential to most
             fundamental cellular processes. There has been increasing
             interest in reconstructing PPIs networks. However, several
             critical difficulties exist in obtaining reliable
             predictions. Noticeably, false positive rates can be as high
             as >80%. Error correction from each generating source can be
             both time-consuming and inefficient due to the difficulty of
             covering the errors from multiple levels of data processing
             procedures within a single test. We propose a novel Bayesian
             integration method, deemed nonparametric Bayes ensemble
             learning (NBEL), to lower the misclassification rate (both
             false positives and negatives) through automatically
             up-weighting data sources that are most informative, while
             down-weighting less informative and biased sources.
             Extensive studies indicate that NBEL is significantly more
             robust than the classic naïve Bayes to unreliable,
             error-prone and contaminated data. On a large human data set
             our NBEL approach predicts many more PPIs than naïve Bayes.
             This suggests that previous studies may have large numbers
             of not only false positives but also false negatives. The
             validation on two human PPIs datasets having high quality
             supports our observations. Our experiments demonstrate that
             it is feasible to predict high-throughput PPIs
             computationally with substantially reduced false positives
             and false negatives. The ability of predicting large numbers
             of PPIs both reliably and automatically may inspire people
             to use computational approaches to correct data errors in
             general, and may speed up PPIs prediction with high quality.
             Such a reliable prediction may provide a solid platform to
             other studies such as protein functions prediction and roles
             of PPIs in disease susceptibility.},
   Doi = {10.1371/journal.pcbi.1002110},
   Key = {fds257968}
}

@article{fds257969,
   Author = {Armagan, A and Dunson, D},
   Title = {Sparse variational analysis of linear mixed models for large
             data sets},
   Journal = {Statistics & Probability Letters},
   Volume = {81},
   Number = {8},
   Pages = {1056-1062},
   Publisher = {Elsevier BV},
   Year = {2011},
   Month = {August},
   ISSN = {0167-7152},
   url = {http://dx.doi.org/10.1016/j.spl.2011.02.029},
   Abstract = {It is increasingly common to be faced with longitudinal or
             multi-level data sets that have large numbers of predictors
             and/or a large sample size. Current methods of fitting and
             inference for mixed effects models tend to perform poorly in
             such settings. When there are many variables, it is
             appealing to allow uncertainty in subset selection and to
             obtain a sparse characterization of the data. Bayesian
             methods are available to address these goals using Markov
             chain Monte Carlo (MCMC), but MCMC is very computationally
             expensive and can be infeasible in large p and/or large n
             problems. As a fast approximate Bayes solution, we recommend
             a novel approximation to the posterior relying on
             variational methods. Variational methods are used to
             approximate the posterior of the parameters in a
             decomposition of the variance components, with priors chosen
             to obtain a sparse solution that allows selection of random
             effects. The method is evaluated through a simulation study,
             and applied to an epidemiological application. © 2011
             Elsevier B.V.},
   Doi = {10.1016/j.spl.2011.02.029},
   Key = {fds257969}
}

@article{fds257993,
   Author = {Zhou, M and Yang, H and Sapiro, G and Dunson, D and Carin,
             L},
   Title = {Covariate-dependent dictionary learning and sparse
             coding},
   Journal = {2015 Ieee International Conference on Acoustics, Speech, and
             Signal Processing (Icassp)},
   Pages = {5824-5827},
   Publisher = {IEEE},
   Year = {2011},
   Month = {August},
   ISSN = {1520-6149},
   url = {http://dx.doi.org/10.1109/ICASSP.2011.5947685},
   Abstract = {A dependent hierarchical beta process (dHBP) is developed as
             a prior for data that may be represented in terms of a
             sparse set of latent features (dictionary elements), with
             covariate-dependent feature usage. The dHBP is applicable to
             general covariates and data models, imposing that signals
             with similar covariates are likely to be manifested in terms
             of similar features. As an application, we consider the
             simultaneous sparse modeling of multiple images, with the
             covariate of a given image linked to its similarity to all
             other images (as applied in manifold learning). Efficient
             inference is performed using hybrid Gibbs,
             Metropolis-Hastings and slice sampling. © 2011
             IEEE.},
   Doi = {10.1109/ICASSP.2011.5947685},
   Key = {fds257993}
}

@article{fds257962,
   Author = {Yang, H and O'Brien, S and Dunson, DB},
   Title = {Nonparametric Bayes Stochastically Ordered Latent Class
             Models.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {495},
   Pages = {807-817},
   Year = {2011},
   Month = {September},
   ISSN = {0162-1459},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22505787},
   Abstract = {Latent class models (LCMs) are used increasingly for
             addressing a broad variety of problems, including sparse
             modeling of multivariate and longitudinal data, model-based
             clustering, and flexible inferences on predictor effects.
             Typical frequentist LCMs require estimation of a single
             finite number of classes, which does not increase with the
             sample size, and have a well-known sensitivity to parametric
             assumptions on the distributions within a class. Bayesian
             nonparametric methods have been developed to allow an
             infinite number of classes in the general population, with
             the number represented in a sample increasing with sample
             size. In this article, we propose a new nonparametric Bayes
             model that allows predictors to flexibly impact the
             allocation to latent classes, while limiting sensitivity to
             parametric assumptions by allowing class-specific
             distributions to be unknown subject to a stochastic ordering
             constraint. An efficient MCMC algorithm is developed for
             posterior computation. The methods are validated using
             simulation studies and applied to the problem of ranking
             medical procedures in terms of the distribution of patient
             morbidity.},
   Doi = {10.1198/jasa.2011.ap10058},
   Key = {fds257962}
}

@article{fds257970,
   Author = {Wang, L and Dunson, DB},
   Title = {Semiparametric bayes' proportional odds models for current
             status data with underreporting.},
   Journal = {Biometrics},
   Volume = {67},
   Number = {3},
   Pages = {1111-1118},
   Year = {2011},
   Month = {September},
   ISSN = {0006-341X},
   url = {http://dx.doi.org/10.1111/j.1541-0420.2010.01532.x},
   Abstract = {Current status data are a type of interval-censored event
             time data in which all the individuals are either left or
             right censored. For example, our motivation is drawn from a
             cross-sectional study, which measured whether or not fibroid
             onset had occurred by the age of an ultrasound exam for each
             woman. We propose a semiparametric Bayesian proportional
             odds model in which the baseline event time distribution is
             estimated nonparametrically by using adaptive monotone
             splines in a logistic regression model and the potential
             risk factors are included in the parametric part of the mean
             structure. The proposed approach has the advantage of being
             straightforward to implement using a simple and efficient
             Gibbs sampler, whereas alternative semiparametric Bayes'
             event time models encounter problems for current status
             data. The model is generalized to allow systematic
             underreporting in a subset of the data, and the methods are
             applied to an epidemiologic study of uterine
             fibroids.},
   Doi = {10.1111/j.1541-0420.2010.01532.x},
   Key = {fds257970}
}

@article{fds257972,
   Author = {Wang, L and Dunson, DB},
   Title = {Bayesian isotonic density regression.},
   Journal = {Biometrika},
   Volume = {98},
   Number = {3},
   Pages = {537-551},
   Year = {2011},
   Month = {September},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asr025},
   Abstract = {Density regression models allow the conditional distribution
             of the response given predictors to change flexibly over the
             predictor space. Such models are much more flexible than
             nonparametric mean regression models with nonparametric
             residual distributions, and are well supported in many
             applications. A rich variety of Bayesian methods have been
             proposed for density regression, but it is not clear whether
             such priors have full support so that any true
             data-generating model can be accurately approximated. This
             article develops a new class of density regression models
             that incorporate stochastic-ordering constraints which are
             natural when a response tends to increase or decrease
             monotonely with a predictor. Theory is developed showing
             large support. Methods are developed for hypothesis testing,
             with posterior computation relying on a simple Gibbs
             sampler. Frequentist properties are illustrated in a
             simulation study, and an epidemiology application is
             considered.},
   Doi = {10.1093/biomet/asr025},
   Key = {fds257972}
}

@article{fds257961,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Approximate dynamic programming for storage
             problems},
   Journal = {Proceedings of the 28th International Conference on Machine
             Learning, Icml 2011},
   Pages = {337-344},
   Year = {2011},
   Month = {October},
   Abstract = {Storage problems are an important subclass of stochastic
             control problems. This paper presents a new method,
             approximate dynamic programming for storage, to solve
             storage problems with continuous, convex decision sets.
             Unlike other solution procedures, ADPS allows math
             programming to be used to make decisions each time period,
             even in the presence of large state variables. We test ADPS
             on the day ahead wind commitment problem with storage.
             Copyright 2011 by the author(s)/owner(s).},
   Key = {fds257961}
}

@article{fds257994,
   Author = {Chen, B and Polatkan, G and Sapiro, G and Dunson, DB and Carin,
             L},
   Title = {The hierarchical beta process for convolutional factor
             analysis and deep learning},
   Journal = {Proceedings of the 28th International Conference on Machine
             Learning, Icml 2011},
   Pages = {361-368},
   Year = {2011},
   Month = {October},
   Abstract = {A convolutional factor-analysis model is developed, with the
             number of filters (factors) inferred via the beta process
             (BP) and hierarchical BP, for single-task and multi-task
             learning, respectively. The computation of the model
             parameters is implemented within a Bayesian setting,
             employing Gibbs sampling; we explicitly exploit the
             convolutional nature of the expansion to accelerate
             computations. The model is used in a multi-level ("deep")
             analysis of general data, with specific results presented
             for image-processing data sets, e.g., classification.
             Copyright 2011 by the author(s)/owner(s).},
   Key = {fds257994}
}

@article{fds257957,
   Author = {Gordon, GJ and Dunson, D},
   Title = {Preface to the proceedings of AISTATS 2011},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1-2},
   Year = {2011},
   Month = {December},
   ISSN = {1532-4435},
   Key = {fds257957}
}

@article{fds257971,
   Author = {Canale, A and Dunson, DB},
   Title = {Bayesian Kernel Mixtures for Counts.},
   Journal = {Journal of the American Statistical Association},
   Volume = {106},
   Number = {496},
   Pages = {1528-1539},
   Year = {2011},
   Month = {December},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2011.tm10552},
   Abstract = {Although Bayesian nonparametric mixture models for
             continuous data are well developed, there is a limited
             literature on related approaches for count data. A common
             strategy is to use a mixture of Poissons, which
             unfortunately is quite restrictive in not accounting for
             distributions having variance less than the mean. Other
             approaches include mixing multinomials, which requires
             finite support, and using a Dirichlet process prior with a
             Poisson base measure, which does not allow smooth deviations
             from the Poisson. As a broad class of alternative models, we
             propose to use nonparametric mixtures of rounded continuous
             kernels. An efficient Gibbs sampler is developed for
             posterior computation, and a simulation study is performed
             to assess performance. Focusing on the rounded Gaussian
             case, we generalize the modeling framework to account for
             multivariate count data, joint modeling with continuous and
             categorical variables, and other complications. The methods
             are illustrated through applications to a developmental
             toxicity study and marketing data. This article has
             supplementary material online.},
   Doi = {10.1198/jasa.2011.tm10552},
   Key = {fds257971}
}

@article{fds257997,
   Author = {Zhang, XX and Dunson, DB and Carin, L},
   Title = {Hierarchical topic modeling for analysis of time-evolving
             personal choices},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, Nips 2011},
   Year = {2011},
   Month = {December},
   Abstract = {The nested Chinese restaurant process is extended to design
             a nonparametric topic-model tree for representation of human
             choices. Each tree path corresponds to a type of person, and
             each node (topic) has a corresponding probability vector
             over items that may be selected. The observed data are
             assumed to have associated temporal covariates
             (corresponding to the time at which choices are made), and
             we wish to impose that with increasing time it is more
             probable that topics deeper in the tree are utilized. This
             structure is imposed by developing a new "change point"
             stick-breaking model that is coupled with a Poisson and
             product-of-gammas construction. To share topics across the
             tree nodes, topic distributions are drawn from a Dirichlet
             process. As a demonstration of this concept, we analyze real
             data on course selections of undergraduate students at Duke
             University, with the goal of uncovering and concisely
             representing structure in the curriculum and in the
             characteristics of the student body.},
   Key = {fds257997}
}

@article{fds257998,
   Author = {Ren, L and Wang, Y and Dunson, D and Carin, L},
   Title = {The kernel beta process},
   Journal = {Advances in Neural Information Processing Systems 24: 25th
             Annual Conference on Neural Information Processing Systems
             2011, Nips 2011},
   Year = {2011},
   Month = {December},
   Abstract = {A new Lévy process prior is proposed for an uncountable
             collection of covariate-dependent feature-learning measures;
             the model is called the kernel beta process (KBP). Available
             covariates are handled efficiently via the kernel
             construction, with covariates assumed observed with each
             data sample ("customer"), and latent covariates learned for
             each feature ("dish"). Each customer selects dishes from an
             infinite buffet, in a manner analogous to the beta process,
             with the added constraint that a customer first decides
             probabilistically whether to "consider" a dish, based on the
             distance in covariate space between the customer and dish.
             If a customer does consider a particular dish, that dish is
             then selected probabilistically as in the beta process. The
             beta process is recovered as a limiting case of the KBP. An
             efficient Gibbs sampler is developed for computations, and
             state-of-the-art results are presented for image processing
             and music analysis tasks.},
   Key = {fds257998}
}

@article{fds257999,
   Author = {Zhou, M and Carin, L and Yang, H and Dunson, D and Sapiro,
             G},
   Title = {Dependent hierarchical beta process for image interpolation
             and denoising},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {883-891},
   Year = {2011},
   Month = {December},
   ISSN = {1532-4435},
   Abstract = {A dependent hierarchical beta process (dHBP) is developed as
             a prior for data that may be represented in terms of a
             sparse set of latent features, with covariate-dependent
             feature usage. The dHBP is applicable to general covariates
             and data models, imposing that signals with similar
             covariates are likely to be manifested in terms of similar
             features. Coupling the dHBP with the Bernoulli process, and
             upon marginalizing out the dHBP, the model may be
             interpreted as a covariate-dependent hierarchical Indian
             buffet process. As applications, we consider interpolation
             and denoising of an image, with covariates defined by the
             location of image patches within an image. Two types of
             noise models are considered: (i) typical white Gaussian
             noise; and (ii) spiky noise of arbitrary amplitude,
             distributed uniformly at random. In these examples, the
             features correspond to the atoms of a dictionary, learned
             based upon the data under test (without a priori training
             data). State-of-the-art performance is demonstrated, with
             efficient inference using hybrid Gibbs, Metropolis-Hastings
             and slice sampling. Copyright 2011 by the
             authors.},
   Key = {fds257999}
}

@article{fds258003,
   Author = {Zhou, M and Chen, H and Paisley, J and Ren, L and Li, L and Xing, Z and Dunson, D and Sapiro, G and Carin, L},
   Title = {Nonparametric Bayesian dictionary learning for analysis of
             noisy and incomplete images.},
   Journal = {Ieee Transactions on Image Processing : a Publication of the
             Ieee Signal Processing Society},
   Volume = {21},
   Number = {1},
   Pages = {130-144},
   Year = {2012},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/21693421},
   Abstract = {Nonparametric Bayesian methods are considered for recovery
             of imagery based upon compressive, incomplete, and/or noisy
             measurements. A truncated beta-Bernoulli process is employed
             to infer an appropriate dictionary for the data under test
             and also for image recovery. In the context of compressive
             sensing, significant improvements in image recovery are
             manifested using learned dictionaries, relative to using
             standard orthonormal image expansions. The
             compressive-measurement projections are also optimized for
             the learned dictionary. Additionally, we consider simpler
             (incomplete) measurements, defined by measuring a subset of
             image pixels, uniformly selected at random. Spatial
             interrelationships within imagery are exploited through use
             of the Dirichlet and probit stick-breaking processes.
             Several example results are presented, with comparisons to
             other methods in the literature.},
   Doi = {10.1109/TIP.2011.2160072},
   Key = {fds258003}
}

@article{fds258045,
   Author = {Dunson, DB and Xing, C},
   Title = {Nonparametric Bayes Modeling of Multivariate Categorical
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {104},
   Number = {487},
   Pages = {1042-1051},
   Year = {2012},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1198/jasa.2009.tm08439},
   Abstract = {Modeling of multivariate unordered categorical (nominal)
             data is a challenging problem, particularly in high
             dimensions and cases in which one wishes to avoid strong
             assumptions about the dependence structure. Commonly used
             approaches rely on the incorporation of latent Gaussian
             random variables or parametric latent class models. The goal
             of this article is to develop a nonparametric Bayes
             approach, which defines a prior with full support on the
             space of distributions for multiple unordered categorical
             variables. This support condition ensures that we are not
             restricting the dependence structure a priori. We show this
             can be accomplished through a Dirichlet process mixture of
             product multinomial distributions, which is also a
             convenient form for posterior computation. Methods for
             nonparametric testing of violations of independence are
             proposed, and the methods are applied to model positional
             dependence within transcription factor binding
             motifs.},
   Doi = {10.1198/jasa.2009.tm08439},
   Key = {fds258045}
}

@article{fds323139,
   Author = {Carin, L and Hero, A and Lucas, J and Dunson, D and Chen, M and Heñao, R and Tibau-Puig, A and Zaas, A and Woods, CW and Ginsburg,
             GS},
   Title = {High-Dimensional Longitudinal Genomic Data: An analysis used
             for monitoring viral infections.},
   Journal = {Ieee Signal Processing Magazine},
   Volume = {29},
   Number = {1},
   Pages = {108-123},
   Year = {2012},
   Month = {January},
   url = {http://dx.doi.org/10.1109/MSP.2011.943009},
   Doi = {10.1109/MSP.2011.943009},
   Key = {fds323139}
}

@article{fds322562,
   Author = {Fyshe, A and Fox, E and Dunson, D and Mitchell, T},
   Title = {Hierarchical latent dictionaries for models of brain
             activation},
   Journal = {Journal of Machine Learning Research},
   Volume = {22},
   Pages = {409-421},
   Year = {2012},
   Month = {January},
   Abstract = {In this work, we propose a hierarchical latent dictionary
             approach to estimate the timevarying mean and covariance of
             a process for which we have only limited noisy samples. We
             fully leverage the limited sample size and redundancy in
             sensor measurements by transferring knowledge through a
             hierarchy of lower dimensional latent processes. As a case
             study, we utilize Magnetoencephalography (MEG) recordings of
             brain activity to identify the word being viewed by a human
             subject. Specifically, we identify the word category for a
             single noisy MEG recording, when only given limited noisy
             samples on which to train.},
   Key = {fds322562}
}

@article{fds323268,
   Author = {Zhou, M and Hannah, LA and Dunson, DB and Carin, L},
   Title = {Beta-negative binomial process and poisson factor
             analysis},
   Journal = {Journal of Machine Learning Research},
   Volume = {22},
   Pages = {1462-1471},
   Year = {2012},
   Month = {January},
   Abstract = {© Copyright 2012 by the authors. A beta-negative binomial
             (BNB) process is proposed, leading to a beta-gamma-Poisson
             process, which may be viewed as a "multiscoop"
             generalization of the beta-Bernoulli process. The BNB
             process is augmented into a beta-gamma-gamma-Poisson
             hierarchical structure, and applied as a nonparametric
             Bayesian prior for an infinite Poisson factor analysis
             model. A finite approximation for the beta process Lévy
             random measure is constructed for convenient implementation.
             Efficient MCMC computations are performed with data
             augmentation and marginalization techniques. Encouraging
             results are shown on document count matrix
             factorization.},
   Key = {fds323268}
}

@article{fds257973,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Simplex Factor Models for Multivariate Unordered Categorical
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {107},
   Number = {497},
   Pages = {362-377},
   Year = {2012},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2011.646934},
   Abstract = {Gaussian latent factor models are routinely used for
             modeling of dependence in continuous, binary, and ordered
             categorical data. For unordered categorical variables,
             Gaussian latent factor models lead to challenging
             computation and complex modeling structures. As an
             alternative, we propose a novel class of simplex factor
             models. In the single-factor case, the model treats the
             different categorical outcomes as independent with unknown
             marginals. The model can characterize flexible dependence
             structures parsimoniously with few factors, and as factors
             are added, any multivariate categorical data distribution
             can be accurately approximated. Using a Bayesian approach
             for computation and inferences, a Markov chain Monte Carlo
             (MCMC) algorithm is proposed that scales well with
             increasing dimension, with the number of factors treated as
             unknown. We develop an efficient proposal for updating the
             base probability vector in hierarchical Dirichlet models.
             Theoretical properties are described, and we evaluate the
             approach through simulation examples. Applications are
             described for modeling dependence in nucleotide sequences
             and prediction from high-dimensional categorical
             features.},
   Doi = {10.1080/01621459.2011.646934},
   Key = {fds257973}
}

@article{fds257975,
   Author = {Bhattacharya, A and Dunson, DB},
   Title = {Strong consistency of nonparametric Bayes density estimation
             on compact metric spaces with applications to specific
             manifolds.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {64},
   Number = {4},
   Pages = {687-714},
   Year = {2012},
   Month = {August},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-011-0341-x},
   Abstract = {This article considers a broad class of kernel mixture
             density models on compact metric spaces and manifolds.
             Following a Bayesian approach with a nonparametric prior on
             the location mixing distribution, sufficient conditions are
             obtained on the kernel, prior and the underlying space for
             strong posterior consistency at any continuous density. The
             prior is also allowed to depend on the sample size n and
             sufficient conditions are obtained for weak and strong
             consistency. These conditions are verified on compact
             Euclidean spaces using multivariate Gaussian kernels, on the
             hypersphere using a von Mises-Fisher kernel and on the
             planar shape space using complex Watson kernels.},
   Doi = {10.1007/s10463-011-0341-x},
   Key = {fds257975}
}

@article{fds257974,
   Author = {Bhattacharya, A and Dunson, D},
   Title = {Nonparametric Bayes classification and hypothesis testing on
             manifolds},
   Journal = {Journal of Multivariate Analysis},
   Volume = {111},
   Pages = {1-19},
   Publisher = {Elsevier BV},
   Year = {2012},
   Month = {October},
   ISSN = {0047-259X},
   url = {http://dx.doi.org/10.1016/j.jmva.2012.02.020},
   Abstract = {Our first focus is prediction of a categorical response
             variable using features that lie on a general manifold. For
             example, the manifold may correspond to the surface of a
             hypersphere. We propose a general kernel mixture model for
             the joint distribution of the response and predictors, with
             the kernel expressed in product form and dependence induced
             through the unknown mixing measure. We provide simple
             sufficient conditions for large support and weak and strong
             posterior consistency in estimating both the joint
             distribution of the response and predictors and the
             conditional distribution of the response. Focusing on a
             Dirichlet process prior for the mixing measure, these
             conditions hold using von Mises-Fisher kernels when the
             manifold is the unit hypersphere. In this case, Bayesian
             methods are developed for efficient posterior computation
             using slice sampling. Next we develop Bayesian nonparametric
             methods for testing whether there is a difference in
             distributions between groups of observations on the manifold
             having unknown densities. We prove consistency of the Bayes
             factor and develop efficient computational methods for its
             calculation. The proposed classification and testing methods
             are evaluated using simulation examples and applied to
             spherical data applications. © 2012 Elsevier
             Inc.},
   Doi = {10.1016/j.jmva.2012.02.020},
   Key = {fds257974}
}

@article{fds257977,
   Author = {Hua, Z and Dunson, DB and Gilmore, JH and Styner, MA and Zhu,
             H},
   Title = {Semiparametric Bayesian local functional models for
             diffusion tensor tract statistics.},
   Journal = {Neuroimage},
   Volume = {63},
   Number = {1},
   Pages = {460-474},
   Year = {2012},
   Month = {October},
   ISSN = {1053-8119},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2012.06.027},
   Abstract = {We propose a semiparametric Bayesian local functional model
             (BFM) for the analysis of multiple diffusion properties
             (e.g., fractional anisotropy) along white matter fiber
             bundles with a set of covariates of interest, such as age
             and gender. BFM accounts for heterogeneity in the shape of
             the fiber bundle diffusion properties among subjects, while
             allowing the impact of the covariates to vary across
             subjects. A nonparametric Bayesian LPP2 prior facilitates
             global and local borrowings of information among subjects,
             while an infinite factor model flexibly represents
             low-dimensional structure. Local hypothesis testing and
             credible bands are developed to identify fiber segments,
             along which multiple diffusion properties are significantly
             associated with covariates of interest, while controlling
             for multiple comparisons. Moreover, BFM naturally group
             subjects into more homogeneous clusters. Posterior
             computation proceeds via an efficient Markov chain Monte
             Carlo algorithm. A simulation study is performed to evaluate
             the finite sample performance of BFM. We apply BFM to
             investigate the development of white matter diffusivities
             along the splenium of the corpus callosum tract and the
             right internal capsule tract in a clinical study of
             neurodevelopment in new born infants.},
   Doi = {10.1016/j.neuroimage.2012.06.027},
   Key = {fds257977}
}

@article{fds258025,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Ensemble methods for convex regression with applications to
             geometric programming based circuit design},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, Icml 2012},
   Volume = {1},
   Pages = {369-376},
   Year = {2012},
   Month = {October},
   Abstract = {Convex regression is a promising area for bridging
             statistical estimation and deterministic convex
             optimization. New piecewise linear convex regression methods
             (Hannah and Dunson, 2011; Magnani and Boyd, 2009) are fast
             and scalable, but can have instability when used to
             approximate constraints or objective functions for
             optimization. Ensemble methods, like bagging, smearing and
             random partitioning, can alleviate this problem and maintain
             the theoretical properties of the underlying estimator. We
             empirically examine the performance of ensemble methods for
             prediction and optimization, and then apply them to device
             modeling and constraint approximation for geometric
             programming based circuit design. Copyright 2012 by the
             author(s)/owner(s).},
   Key = {fds258025}
}

@article{fds258026,
   Author = {Shterev, ID and Dunson, DB},
   Title = {Bayesian watermark attacks},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, Icml 2012},
   Volume = {1},
   Pages = {695-702},
   Year = {2012},
   Month = {October},
   Abstract = {This paper presents an application of statistical machine
             learning to the field of water-marking. We propose a new
             attack model on additive spread-spectrum watermarking
             systems. The proposed attack is based on Bayesian
             statistics. We consider the scenario in which a watermark
             signal is repeatedly embedded in specific, possibly chosen
             based on a secret message bitstream, segments (signals) of
             the host data. The host signal can represent a patch of
             pixels from an image or a video frame. We propose a
             probabilistic model that infers the embedded message
             bit-stream and watermark signal, directly from the
             watermarked data, without access to the decoder. We develop
             an efficient Markov chain Monte Carlo sampler for updating
             the model parameters from their conjugate full conditional
             posteriors. We also provide a variational Bayesian solution,
             which further increases the convergence speed of the
             algorithm. Experiments with synthetic and real image signals
             demonstrate that the attack model is able to correctly infer
             a large part of the message bitstream and obtain a very
             accurate estimate of the watermark signal. Copyright 2012 by
             the author(s)/owner(s).},
   Key = {fds258026}
}

@article{fds258027,
   Author = {Zhou, M and Li, L and Dunson, D and Carin, L},
   Title = {Lognormal and gamma mixed negative binomial
             regression},
   Journal = {Proceedings of the 29th International Conference on Machine
             Learning, Icml 2012},
   Volume = {2},
   Pages = {1343-1350},
   Year = {2012},
   Month = {October},
   url = {http://hdl.handle.net/10161/8954 Duke open
             access},
   Abstract = {In regression analysis of counts, a lack of simple and
             efficient algorithms for posterior computation has made
             Bayesian approaches appear unattractive and thus
             underdeveloped. We propose a lognormal and gamma mixed
             negative binomial (NB) regression model for counts, and
             present efficient closed-form Bayesian inference; unlike
             conventional Poisson models, the proposed approach has two
             free parameters to include two different kinds of random
             effects, and allows the incorporation of prior information,
             such as sparsity in the regression coefficients. By placing
             a gamma distribution prior on the NB dispersion parameter r,
             and connecting a log-normal distribution prior with the
             logit of the NB probability parameter p, efficient Gibbs
             sampling and variational Bayes inference are both developed.
             The closed-form updates are obtained by exploiting
             conditional conjugacy via both a compound Poisson
             representation and a Polya-Gamma distribution based data
             augmentation approach. The proposed Bayesian inference can
             be implemented routinely, while being easily generalizable
             to more complex settings involving multivariate dependence
             structures. The algorithms are illustrated using real
             examples. Copyright 2012 by the author(s)/owner(s).},
   Key = {fds258027}
}

@article{fds257875,
   Author = {Petralia, F and Rao, V and Dunson, DB},
   Title = {Repulsive mixtures},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {3},
   Pages = {1889-1897},
   Year = {2012},
   Month = {December},
   ISSN = {1049-5258},
   Abstract = {Discrete mixtures are used routinely in broad sweeping
             applications ranging from unsupervised settings to fully
             supervised multi-task learning. Indeed, finite mixtures and
             infinite mixtures, relying on Dirichlet processes and
             modifications, have become a standard tool. One important
             issue that arises in using discrete mixtures is low
             separation in the components; in particular, different
             components can be introduced that are very similar and hence
             redundant. Such redundancy leads to too many clusters that
             are too similar, degrading performance in unsupervised
             learning and leading to computational problems and an
             unnecessarily complex model in supervised settings.
             Redundancy can arise in the absence of a penalty on
             components placed close together even when a Bayesian
             approach is used to learn the number of components. To solve
             this problem, we propose a novel prior that generates
             components from a repulsive process, automatically
             penalizing redundant components. We characterize this
             repulsive prior theoretically and propose a Markov chain
             Monte Carlo sampling algorithm for posterior computation.
             The methods are illustrated using synthetic examples and an
             iris data set.},
   Key = {fds257875}
}

@article{fds257876,
   Author = {Fox, EB and Dunson, DB},
   Title = {Multiresolution Gaussian processes},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {1},
   Pages = {737-745},
   Year = {2012},
   Month = {December},
   ISSN = {1049-5258},
   Abstract = {We propose a multiresolution Gaussian process to capture
             long-range, non-Markovian dependencies while allowing for
             abrupt changes and non-stationarity. The multiresolution GP
             hierarchically couples a collection of smooth GPs, each
             defined over an element of a random nested partition.
             Long-range dependencies are captured by the top-level GP
             while the partition points define the abrupt changes. Due to
             the inherent conjugacy of the GPs, one can analytically
             marginalize the GPs and compute the marginal likelihood of
             the observations given the partition tree. This property
             allows for efficient inference of the partition itself, for
             which we employ graph-theoretic techniques. We apply the
             multiresolution GP to the analysis of magnetoencephalography
             (MEG) recordings of brain activity.},
   Key = {fds257876}
}

@article{fds257878,
   Author = {Ding, M and He, L and Dunson, D and Carin, L},
   Title = {Nonparametric Bayesian Segmentation of a Multivariate
             Inhomogeneous Space-Time Poisson Process.},
   Journal = {Bayesian Analysis},
   Volume = {7},
   Number = {4},
   Pages = {813-840},
   Year = {2012},
   Month = {December},
   ISSN = {1931-6690},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000311975100005&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {A nonparametric Bayesian model is proposed for segmenting
             time-evolving multivariate spatial point process data. An
             inhomogeneous Poisson process is assumed, with a logistic
             stick-breaking process (LSBP) used to encourage
             piecewise-constant spatial Poisson intensities. The LSBP
             explicitly favors spatially contiguous segments, and infers
             the number of segments based on the observed data. The
             temporal dynamics of the segmentation and of the Poisson
             intensities are modeled with exponential correlation in
             time, implemented in the form of a first-order
             autoregressive model for uniformly sampled discrete data,
             and via a Gaussian process with an exponential kernel for
             general temporal sampling. We consider and compare two
             different inference techniques: a Markov chain Monte Carlo
             sampler, which has relatively high computational complexity;
             and an approximate and efficient variational Bayesian
             analysis. The model is demonstrated with a simulated example
             and a real example of space-time crime events in Cincinnati,
             Ohio, USA.},
   Doi = {10.1214/12-BA727},
   Key = {fds257878}
}

@article{fds257881,
   Author = {Love, C and Sun, Z and Jima, D and Li, G and Zhang, J and Miles, R and Richards, KL and Dunphy, CH and Choi, WWL and Srivastava, G and Lugar,
             PL and Rizzieri, DA and Lagoo, AS and Bernal-Mizrachi, L and Mann, KP and Flowers, CR and Naresh, KN and Evens, AM and Chadburn, A and Gordon, LI and Czader, MB and Gill, JI and Hsi, ED and Greenough, A and Moffitt, AB and McKinney, M and Banerjee, A and Grubor, V and Levy, S and Dunson, DB and Dave, SS},
   Title = {The genetic landscape of mutations in Burkitt
             lymphoma.},
   Journal = {Nat Genet},
   Volume = {44},
   Number = {12},
   Pages = {1321-1325},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23143597},
   Abstract = {Burkitt lymphoma is characterized by deregulation of MYC,
             but the contribution of other genetic mutations to the
             disease is largely unknown. Here, we describe the first
             completely sequenced genome from a Burkitt lymphoma tumor
             and germline DNA from the same affected individual. We
             further sequenced the exomes of 59 Burkitt lymphoma tumors
             and compared them to sequenced exomes from 94 diffuse large
             B-cell lymphoma (DLBCL) tumors. We identified 70 genes that
             were recurrently mutated in Burkitt lymphomas, including
             ID3, GNA13, RET, PIK3R1 and the SWI/SNF genes ARID1A and
             SMARCA4. Our data implicate a number of genes in cancer for
             the first time, including CCT6B, SALL3, FTCD and PC. ID3
             mutations occurred in 34% of Burkitt lymphomas and not in
             DLBCLs. We show experimentally that ID3 mutations promote
             cell cycle progression and proliferation. Our work thus
             elucidates commonly occurring gene-coding mutations in
             Burkitt lymphoma and implicates ID3 as a new tumor
             suppressor gene.},
   Doi = {10.1038/ng.2468},
   Key = {fds257881}
}

@article{fds257976,
   Author = {Montagna, S and Tokdar, ST and Neelon, B and Dunson,
             DB},
   Title = {Bayesian latent factor regression for functional and
             longitudinal data.},
   Journal = {Biometrics},
   Volume = {68},
   Number = {4},
   Pages = {1064-1073},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23005895},
   Abstract = {In studies involving functional data, it is commonly of
             interest to model the impact of predictors on the
             distribution of the curves, allowing flexible effects on not
             only the mean curve but also the distribution about the
             mean. Characterizing the curve for each subject as a linear
             combination of a high-dimensional set of potential basis
             functions, we place a sparse latent factor regression model
             on the basis coefficients. We induce basis selection by
             choosing a shrinkage prior that allows many of the loadings
             to be close to zero. The number of latent factors is treated
             as unknown through a highly-efficient, adaptive-blocked
             Gibbs sampler. Predictors are included on the latent
             variables level, while allowing different predictors to
             impact different latent factors. This model induces a
             framework for functional response regression in which the
             distribution of the curves is allowed to change flexibly
             with predictors. The performance is assessed through
             simulation studies and the methods are applied to data on
             blood pressure trajectories during pregnancy.},
   Doi = {10.1111/j.1541-0420.2012.01788.x},
   Key = {fds257976}
}

@article{fds258004,
   Author = {Zhu, B and Dunson, DB and Ashley-Koch, AE},
   Title = {Adverse subpopulation regression for multivariate outcomes
             with high-dimensional predictors.},
   Journal = {Stat Med},
   Volume = {31},
   Number = {29},
   Pages = {4102-4113},
   Year = {2012},
   Month = {December},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/22825854},
   Abstract = {Biomedical studies have a common interest in assessing
             relationships between multiple related health outcomes and
             high-dimensional predictors. For example, in reproductive
             epidemiology, one may collect pregnancy outcomes such as
             length of gestation and birth weight and predictors such as
             single nucleotide polymorphisms in multiple candidate genes
             and environmental exposures. In such settings, there is a
             need for simple yet flexible methods for selecting true
             predictors of adverse health responses from a
             high-dimensional set of candidate predictors. To address
             this problem, one may either consider linear regression
             models for the continuous outcomes or convert these outcomes
             into binary indicators of adverse responses using predefined
             cutoffs. The former strategy has the disadvantage of often
             leading to a poorly fitting model that does not predict risk
             well, whereas the latter approach can be very sensitive to
             the cutoff choice. As a simple yet flexible alternative, we
             propose a method for adverse subpopulation regression, which
             relies on a two-component latent class model, with the
             dominant component corresponding to (presumed) healthy
             individuals and the risk of falling in the minority
             component characterized via a logistic regression. The
             logistic regression model is designed to accommodate
             high-dimensional predictors, as occur in studies with a
             large number of gene by environment interactions, through
             the use of a flexible nonparametric multiple shrinkage
             approach. The Gibbs sampler is developed for posterior
             computation. We evaluate the methods with the use of
             simulation studies and apply these to a genetic epidemiology
             study of pregnancy outcomes.},
   Doi = {10.1002/sim.5520},
   Key = {fds258004}
}

@article{fds257854,
   Author = {Xing, Z and Nicholson, B and Jimenez, M and Veldman, T and Hudson, L and Lucas, J and Dunson, D and Zaas, AK and Woods, CW and Ginsburg, GS and Carin, L},
   Title = {Bayesian modeling of temporal properties of infectious
             disease in a college student population},
   Journal = {Journal of Applied Statistics},
   Volume = {41},
   Number = {6},
   Pages = {1358-1382},
   Publisher = {Informa UK Limited},
   Year = {2013},
   ISSN = {0266-4763},
   url = {http://dx.doi.org/10.1080/02664763.2013.870138},
   Doi = {10.1080/02664763.2013.870138},
   Key = {fds257854}
}

@article{fds257862,
   Author = {Li, D and Longnecker, MP and Dunson, DB},
   Title = {Lipid adjustment for chemical exposures: Accounting for
             concomitant variables},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {24},
   Number = {6},
   Pages = {921-928},
   Year = {2013},
   ISSN = {1044-3983},
   url = {http://dx.doi.org/10.1097/EDE.0b013e3182a671e4},
   Abstract = {BACKGROUND: Some environmental chemical exposures are
             lipophilic and need to be adjusted by serum lipid levels
             before data analyses. There are currently various strategies
             that attempt to account for this problem, but all have their
             drawbacks. To address such concerns, we propose a new method
             that uses Box-Cox transformations and a simple Bayesian
             hierarchical model to adjust for lipophilic chemical
             exposures. METHODS: We compared our Box-Cox method to
             existing methods. We ran simulation studies in which
             increasing levels of lipid-adjusted chemical exposure did
             and did not increase the odds of having a disease, and we
             looked at both single-exposure and multiple-exposure cases.
             We also analyzed an epidemiology dataset that examined the
             effects of various chemical exposure on the risk of birth
             defects. RESULTS: Compared with existing methods, our
             Box-Cox method produced unbiased estimates, good coverage,
             similar power, and lower type I error rates. This was the
             case in both single- and multiple-exposure simulation
             studies. Results from analysis of the birth-defect data
             differed from results using existing methods. CONCLUSION:
             Our Box-Cox method is a novel and intuitive way to account
             for the lipophilic nature of certain chemical exposures. It
             addresses some of the problems with existing methods, is
             easily extendable to multiple exposures, and can be used in
             any analysis that involves concomitant variables. Copyright
             © 2013 by Lippincott Williams &amp; Wilkins.},
   Doi = {10.1097/EDE.0b013e3182a671e4},
   Key = {fds257862}
}

@article{fds304008,
   Author = {Banerjee, A and Dunson, DB and Tokdar, ST},
   Title = {Efficient Gaussian process regression for large
             datasets},
   Journal = {Biometrika},
   Volume = {100},
   Number = {1},
   Pages = {75-89},
   Year = {2013},
   url = {http://arxiv.org/abs/1106.5779v1},
   Abstract = {Gaussian processes are widely used in nonparametric
             regression, classification and spatiotemporal modelling,
             facilitated in part by a rich literature on their
             theoretical properties. However, one of their practical
             limitations is expensive computation, typically on the order
             of n3 where n is the number of data points, in performing
             the necessary matrix inversions. For large datasets, storage
             and processing also lead to computational bottlenecks, and
             numerical stability of the estimates and predicted values
             degrades with increasing n. Various methods have been
             proposed to address these problems, including predictive
             processes in spatial data analysis and the
             subset-of-regressors technique in machine learning. The idea
             underlying these approaches is to use a subset of the data,
             but this raises questions concerning sensitivity to the
             choice of subset and limitations in estimating fine-scale
             structure in regions that are not well covered by the
             subset. Motivated by the literature on compressive sensing,
             we propose an alternative approach that involves linear
             projection of all the data points onto a lower-dimensional
             subspace. We demonstrate the superiority of this approach
             from a theoretical perspective and through simulated and
             real data examples. © 2012 Biometrika Trust.},
   Doi = {10.1093/biomet/ass068},
   Key = {fds304008}
}

@article{fds257847,
   Author = {Petralia, F and Vogelstein, J and Dunson, DB},
   Title = {Multiscale dictionary learning for estimating conditional
             distributions},
   Journal = {Advances in Neural Information Processing
             Systems},
   Year = {2013},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {Nonparametric estimation of the conditional distribution of
             a response given highdimensional features is a challenging
             problem. It is important to allow not only the mean but also
             the variance and shape of the response density to change
             flexibly with features, which are massive-dimensional. We
             propose a multiscale dictionary learning model, which
             expresses the conditional response density as a convex
             combination of dictionary densities, with the densities used
             and their weights dependent on the path through a tree
             decomposition of the feature space. A fast graph
             partitioning algorithm is applied to obtain the tree
             decomposition, with Bayesian methods then used to adaptively
             prune and average over different sub-trees in a soft
             probabilistic manner. The algorithm scales efficiently to
             approximately one million features. State of the art
             predictive performance is demonstrated for toy examples and
             two neuroscience applications including up to a million
             features.},
   Key = {fds257847}
}

@article{fds257848,
   Author = {Durante, D and Scarpa, B and Dunson, DB},
   Title = {Locally adaptive bayesian multivariate time
             series},
   Journal = {Advances in Neural Information Processing
             Systems},
   Year = {2013},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {In modeling multivariate time series, it is important to
             allow time-varying smoothness in the mean and covariance
             process. In particular, there may be certain time intervals
             exhibiting rapid changes and others in which changes are
             slow. If such locally adaptive smoothness is not accounted
             for, one can obtain misleading inferences and predictions,
             with over-smoothing across erratic time intervals and
             under-smoothing across times exhibiting slow variation. This
             can lead to miscalibration of predictive intervals, which
             can be substantially too narrow or wide depending on the
             time. We propose a continuous multivariate stochastic
             process for time series having locally varying smoothness in
             both the mean and covariance matrix. This process is
             constructed utilizing latent dictionary functions in time,
             which are given nested Gaussian process priors and linearly
             related to the observed data through a sparse mapping. Using
             a differential equation representation, we bypass usual
             computational bottlenecks in obtaining MCMC and online
             algorithms for approximate Bayesian inference. The
             performance is assessed in simulations and illustrated in a
             financial application.},
   Key = {fds257848}
}

@article{fds257855,
   Author = {Zhu, B and Dunson, DB},
   Title = {Locally Adaptive Bayes Nonparametric Regression via Nested
             Gaussian Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {504},
   Year = {2013},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.838568},
   Abstract = {We propose a nested Gaussian process (nGP) as a locally
             adaptive prior for Bayesian nonparametric regression.
             Specified through a set of stochastic differential equations
             (SDEs), the nGP imposes a Gaussian process prior for the
             function's mth-order derivative. The nesting comes in
             through including a local instantaneous mean function, which
             is drawn from another Gaussian process inducing adaptivity
             to locally-varying smoothness. We discuss the support of the
             nGP prior in terms of the closure of a reproducing kernel
             Hilbert space, and consider theoretical properties of the
             posterior. The posterior mean under the nGP prior is shown
             to be equivalent to the minimizer of a nested penalized
             sum-of-squares involving penalties for both the global and
             local roughness of the function. Using highly-efficient
             Markov chain Monte Carlo for posterior inference, the
             proposed method performs well in simulation studies compared
             to several alternatives, and is scalable to massive data,
             illustrated through a proteomics application.},
   Doi = {10.1080/01621459.2013.838568},
   Key = {fds257855}
}

@article{fds257856,
   Author = {Kunihama, T and Dunson, DB},
   Title = {Bayesian modeling of temporal dependence in large sparse
             contingency tables.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {504},
   Pages = {1324-1338},
   Year = {2013},
   Month = {January},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.823866},
   Abstract = {In many applications, it is of interest to study trends over
             time in relationships among categorical variables, such as
             age group, ethnicity, religious affiliation, political party
             and preference for particular policies. At each time point,
             a sample of individuals provide responses to a set of
             questions, with different individuals sampled at each time.
             In such settings, there tends to be abundant missing data
             and the variables being measured may change over time. At
             each time point, one obtains a large sparse contingency
             table, with the number of cells often much larger than the
             number of individuals being surveyed. To borrow information
             across time in modeling large sparse contingency tables, we
             propose a Bayesian autoregressive tensor factorization
             approach. The proposed model relies on a probabilistic
             Parafac factorization of the joint pmf characterizing the
             categorical data distribution at each time point, with
             autocorrelation included across times. Efficient
             computational methods are developed relying on MCMC. The
             methods are evaluated through simulation examples and
             applied to social survey data.},
   Doi = {10.1080/01621459.2013.823866},
   Key = {fds257856}
}

@article{fds257864,
   Author = {Armagan, A and Dunson, DB and Lee, J},
   Title = {GENERALIZED DOUBLE PARETO SHRINKAGE.},
   Journal = {Statistica Sinica},
   Volume = {23},
   Number = {1},
   Pages = {119-143},
   Year = {2013},
   Month = {January},
   ISSN = {1017-0405},
   url = {http://dx.doi.org/10.5705/ss.2011.048},
   Abstract = {We propose a generalized double Pareto prior for Bayesian
             shrinkage estimation and inferences in linear models. The
             prior can be obtained via a scale mixture of Laplace or
             normal distributions, forming a bridge between the Laplace
             and Normal-Jeffreys' priors. While it has a spike at zero
             like the Laplace density, it also has a Student's t-like
             tail behavior. Bayesian computation is straightforward via a
             simple Gibbs sampling algorithm. We investigate the
             properties of the maximum a posteriori estimator, as sparse
             estimation plays an important role in many problems, reveal
             connections with some well-established regularization
             procedures, and show some asymptotic results. The
             performance of the prior is tested through simulations and
             an application.},
   Doi = {10.5705/ss.2011.048},
   Key = {fds257864}
}

@article{fds257870,
   Author = {Chen, B and Polatkan, G and Sapiro, G and Blei, D and Dunson, D and Carin,
             L},
   Title = {Deep Learning with Hierarchical Convolutional Factor
             Analysis.},
   Journal = {Ieee Transactions on Pattern Analysis and Machine
             Intelligence},
   Year = {2013},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23319498},
   Abstract = {Unsupervised multi-layered ("deep") models are considered
             for general data, with a particular focus on imagery. The
             model is represented using a hierarchical convolutional
             factor-analysis construction, with sparse factor loadings
             and scores. The computation of layer-dependent model
             parameters is implemented within a Bayesian setting,
             employing a Gibbs sampler and variational Bayesian (VB)
             analysis, that explicitly exploit the convolutional nature
             of the expansion. In order to address large-scale and
             streaming data, an online version of VB is also developed.
             The number of basis functions or dictionary elements at each
             layer is inferred from the data, based on a beta-Bernoulli
             implementation of the Indian buffet process. Example results
             are presented for several image-processing applications,
             with comparisons to related models in the
             literature.},
   Key = {fds257870}
}

@article{fds322560,
   Author = {Johndrow, JE and Lum, K and Dunson, DB},
   Title = {Diagonal orthant multinomial probit models},
   Journal = {Journal of Machine Learning Research},
   Volume = {31},
   Pages = {29-38},
   Year = {2013},
   Month = {January},
   Abstract = {Copyright 2013 by the authors. Bayesian classification
             commonly relies on probit models, with data augmentation
             algorithms used for posterior computation. By imputing
             latent Gaussian variables, one can often trivially adapt
             computational approaches used in Gaussian models. However,
             MCMC for multinomial probit (MNP) models can be inefficient
             in practice due to high posterior dependence between latent
             variables and parameters, and to difficulties in efficiently
             sampling latent variables when there are more than two
             categories. To address these problems, we propose a new
             class of diagonal orthant (DO) multinomial models. The key
             characteristics of these models include conditional
             independence of the latent variables given model parameters,
             avoidance of arbitrary identifiability restrictions, and
             simple expressions for category probabilities. We show
             substantially improved computational efficiency and
             comparable predictive performance to MNP.},
   Key = {fds322560}
}

@article{fds322561,
   Author = {Banerjee, A and Murray, J and Dunson, DB},
   Title = {Bayesian learning of joint distributions of
             objects},
   Journal = {Journal of Machine Learning Research},
   Volume = {31},
   Pages = {1-9},
   Year = {2013},
   Month = {January},
   Abstract = {Copyright 2013 by the authors. There is increasing interest
             in broad application areas in defining flexible joint models
             for data having a variety of measurement scales, while also
             allowing data of complex types, such as functions, images
             and documents. We consider a general framework for
             nonparametric Bayes joint modeling through mixture models
             that incorporate dependence across data types through a
             joint mixing measure. The mixing measure is assigned a novel
             infinite tensor factorization (ITF) prior that allows
             flexible dependence in cluster allocation across data types.
             The ITF prior is formulated as a tensor product of
             stick-breaking processes. Focusing on a convenient special
             case corresponding to a Parafac factorization, we provide
             basic theory justifying the flexibility of the proposed
             prior and resulting asymptotic properties. Focusing on ITF
             mixtures of product kernels, we develop a new Gibbs sampling
             algorithm for routine implementation relying on slice
             sampling. The methods are compared with alternative joint
             mixture models based on Dirichlet processes and related
             approaches through simulations and real data
             applications.},
   Key = {fds322561}
}

@article{fds257880,
   Author = {Zhang, J and Grubor, V and Love, CL and Banerjee, A and Richards, KL and Mieczkowski, PA and Dunphy, C and Choi, W and Au, WY and Srivastava, G and Lugar, PL and Rizzieri, DA and Lagoo, AS and Bernal-Mizrachi, L and Mann, KP and Flowers, C and Naresh, K and Evens, A and Gordon, LI and Czader, M and Gill, JI and Hsi, ED and Liu, Q and Fan, A and Walsh, K and Jima, D and Smith, LL and Johnson, AJ and Byrd, JC and Luftig, MA and Ni,
             T and Zhu, J and Chadburn, A and Levy, S and Dunson, D and Dave,
             SS},
   Title = {Genetic heterogeneity of diffuse large B-cell
             lymphoma.},
   Journal = {Proc Natl Acad Sci U S A},
   Volume = {110},
   Number = {4},
   Pages = {1398-1403},
   Year = {2013},
   Month = {January},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23292937},
   Abstract = {Diffuse large B-cell lymphoma (DLBCL) is the most common
             form of lymphoma in adults. The disease exhibits a striking
             heterogeneity in gene expression profiles and clinical
             outcomes, but its genetic causes remain to be fully defined.
             Through whole genome and exome sequencing, we characterized
             the genetic diversity of DLBCL. In all, we sequenced 73
             DLBCL primary tumors (34 with matched normal DNA).
             Separately, we sequenced the exomes of 21 DLBCL cell lines.
             We identified 322 DLBCL cancer genes that were recurrently
             mutated in primary DLBCLs. We identified recurrent mutations
             implicating a number of known and not previously identified
             genes and pathways in DLBCL including those related to
             chromatin modification (ARID1A and MEF2B), NF-κB (CARD11
             and TNFAIP3), PI3 kinase (PIK3CD, PIK3R1, and MTOR), B-cell
             lineage (IRF8, POU2F2, and GNA13), and WNT signaling (WIF1).
             We also experimentally validated a mutation in PIK3CD, a
             gene not previously implicated in lymphomas. The patterns of
             mutation demonstrated a classic long tail distribution with
             substantial variation of mutated genes from patient to
             patient and also between published studies. Thus, our study
             reveals the tremendous genetic heterogeneity that underlies
             lymphomas and highlights the need for personalized medicine
             approaches to treating these patients.},
   Doi = {10.1073/pnas.1205299110},
   Key = {fds257880}
}

@article{fds257879,
   Author = {Wang, E and Salazar, E and Dunson, D and Carin, L},
   Title = {Spatio-temporal modeling of legislation and
             votes},
   Journal = {Bayesian Analysis},
   Volume = {8},
   Number = {1},
   Pages = {233-268},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2013},
   Month = {March},
   ISSN = {1936-0975},
   url = {http://dx.doi.org/10.1214/13-BA810},
   Abstract = {A model is presented for analysis of multivariate binary
             data with spatio-temporal dependencies, and applied to
             congressional roll call data from the United States House of
             Representatives and Senate. The model considers each
             legislator's constituency (location), the congressional
             session (time) of each vote, and the details (text) of each
             piece of legislation. The model can predict votes of new
             legislation from only text, while imposing smooth temporal
             evolution of legislator latent features, and correlation of
             legislators with adjacent constituencies. Additionally, the
             model estimates the number of latent dimensions required to
             represent the data. A Gibbs sampler is developed for
             posterior inference. The model is demonstrated as an
             exploratory tool of legislation and it performs well in
             quantitative comparisons to a traditional ideal-point model.
             © 2013 International Society for Bayesian
             Analysis.},
   Doi = {10.1214/13-BA810},
   Key = {fds257879}
}

@article{fds257877,
   Author = {Pati, D and Dunson, DB and Tokdar, ST},
   Title = {Posterior consistency in conditional distribution
             estimation.},
   Journal = {Journal of Multivariate Analysis},
   Volume = {116},
   Pages = {456-472},
   Year = {2013},
   Month = {April},
   ISSN = {0047-259X},
   url = {http://dx.doi.org/10.1016/j.jmva.2013.01.011},
   Abstract = {A wide variety of priors have been proposed for
             nonparametric Bayesian estimation of conditional
             distributions, and there is a clear need for theorems
             providing conditions on the prior for large support, as well
             as posterior consistency. Estimation of an uncountable
             collection of conditional distributions across different
             regions of the predictor space is a challenging problem,
             which differs in some important ways from density and mean
             regression estimation problems. Defining various topologies
             on the space of conditional distributions, we provide
             sufficient conditions for posterior consistency focusing on
             a broad class of priors formulated as predictor-dependent
             mixtures of Gaussian kernels. This theory is illustrated by
             showing that the conditions are satisfied for a class of
             generalized stick-breaking process mixtures in which the
             stick-breaking lengths are monotone, differentiable
             functions of a continuous stochastic process. We also
             provide a set of sufficient conditions for the case where
             stick-breaking lengths are predictor independent, such as
             those arising from a fixed Dirichlet process
             prior.},
   Doi = {10.1016/j.jmva.2013.01.011},
   Key = {fds257877}
}

@article{fds257873,
   Author = {Page, G and Bhattacharya, A and Dunson, D},
   Title = {Classification via bayesian nonparametric learning of affine
             subspaces},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {501},
   Pages = {187-201},
   Publisher = {Informa UK Limited},
   Year = {2013},
   Month = {May},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2013.763566},
   Abstract = {It has become common for datasets to contain large numbers
             of variables in studies conducted in areas such as genetics,
             machine vision, image analysis, and many others. When
             analyzing such data, parametric models are often too
             inflexible while nonparametric procedures tend to be
             nonrobust because of insufficient data on these
             high-dimensional spaces. This is particularly true when
             interest lies in building efficient classifiers in the
             presence of many predictor variables. When dealing with
             these types of data, it is often the case that most of the
             variability tends to lie along a few directions, or more
             generally along a much smaller dimensional submanifold of
             the data space. In this article, we propose a class of
             models that flexibly learn about this submanifold while
             simultaneously performing dimension reduction in
             classification. This methodology allows the cell
             probabilities to vary nonparametrically based on a few
             coordinates expressed as linear combinations of the
             predictors. Also, as opposed to many black-box methods for
             dimensionality reduction, the proposed model is appealing in
             having clearly interpretable and identifiable parameters
             that provide insight into which predictors are important in
             determining accurate classification boundaries. Gibbs
             sampling methods are developed for posterior computation,
             and the methods are illustrated using simulated and real
             data applications. © 2013 American Statistical
             Association.},
   Doi = {10.1080/01621459.2013.763566},
   Key = {fds257873}
}

@article{fds304007,
   Author = {Murray, JS and Dunson, DB and Carin, L and Lucas,
             JE},
   Title = {Bayesian Gaussian Copula Factor Models for Mixed
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {108},
   Number = {502},
   Pages = {656-665},
   Year = {2013},
   Month = {June},
   url = {http://arxiv.org/abs/1111.0317v2},
   Abstract = {Gaussian factor models have proven widely useful for
             parsimoniously characterizing dependence in multivariate
             data. There is a rich literature on their extension to mixed
             categorical and continuous variables, using latent Gaussian
             variables or through generalized latent trait models
             acommodating measurements in the exponential family.
             However, when generalizing to non-Gaussian measured
             variables the latent variables typically influence both the
             dependence structure and the form of the marginal
             distributions, complicating interpretation and introducing
             artifacts. To address this problem we propose a novel class
             of Bayesian Gaussian copula factor models which decouple the
             latent factors from the marginal distributions. A
             semiparametric specification for the marginals based on the
             extended rank likelihood yields straightforward
             implementation and substantial computational gains. We
             provide new theoretical and empirical justifications for
             using this likelihood in Bayesian inference. We propose new
             default priors for the factor loadings and develop efficient
             parameter-expanded Gibbs sampling for posterior computation.
             The methods are evaluated through simulations and applied to
             a dataset in political science. The models in this paper are
             implemented in the R package bfa.},
   Doi = {10.1080/01621459.2012.762328},
   Key = {fds304007}
}

@article{fds257871,
   Author = {Zhu, B and Ashley-Koch, AE and Dunson, DB},
   Title = {Generalized admixture mapping for complex
             traits.},
   Journal = {G3 (Bethesda, Md.)},
   Volume = {3},
   Number = {7},
   Pages = {1165-1175},
   Year = {2013},
   Month = {July},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23665878},
   Abstract = {Admixture mapping is a popular tool to identify regions of
             the genome associated with traits in a recently admixed
             population. Existing methods have been developed primarily
             for identification of a single locus influencing a
             dichotomous trait within a case-control study design. We
             propose a generalized admixture mapping (GLEAM) approach, a
             flexible and powerful regression method for both
             quantitative and qualitative traits, which is able to test
             for association between the trait and local ancestries in
             multiple loci simultaneously and adjust for covariates. The
             new method is based on the generalized linear model and uses
             a quadratic normal moment prior to incorporate admixture
             prior information. Through simulation, we demonstrate that
             GLEAM achieves lower type I error rate and higher power than
             ANCESTRYMAP both for qualitative traits and more
             significantly for quantitative traits. We applied GLEAM to
             genome-wide SNP data from the Illumina African American
             panel derived from a cohort of black women participating in
             the Healthy Pregnancy, Healthy Baby study and identified a
             locus on chromosome 2 associated with the averaged maternal
             mean arterial pressure during 24 to 28 weeks of
             pregnancy.},
   Doi = {10.1534/g3.113.006478},
   Key = {fds257871}
}

@article{fds257872,
   Author = {Yu, K and Chen, CWS and Reed, C and Dunson, DB},
   Title = {Bayesian variable selection in quantile regression},
   Journal = {Statistics and Its Interface},
   Volume = {6},
   Number = {2},
   Pages = {261-274},
   Publisher = {International Press of Boston},
   Year = {2013},
   Month = {July},
   ISSN = {1938-7989},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000319964700009&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {In many applications, interest focuses on assessing
             relationships between predictors and the quantiles of the
             distribution of a continuous response. For example, in
             epidemiology studies, cutoffs to define premature delivery
             have been based on the 10th percentile of the distribution
             for gestational age at delivery. Using quantile regression,
             one can assess how this percentile varies with predictors
             instead of using a pre-defined cutoff. However, there is
             typically uncertainty in which of the many candidate
             predictors should be included. In order to identify
             important predictors and to build accurate predictive
             models, Bayesian methods for variable selection and model
             averaging are very useful. However, such methods are
             currently not available for quantile regression. This
             article develops Bayesian methods for variable selection,
             with a simple and efficient stochastic search variable
             selection (SSVS) algorithm proposed for posterior
             computation. This approach can be used for moderately
             highdimensional variable selection and can accommodate
             uncertainty in basis function selection in non-linear and
             additive quantile regression models. The methods are
             illustrated using simulated data and an application to the
             Boston Housing data.},
   Doi = {10.4310/sii.2013.v6.n2.a9},
   Key = {fds257872}
}

@article{fds257868,
   Author = {Salazar, E and Dunson, DB and Carin, L},
   Title = {Analysis of space-time relational data with application to
             legislative voting},
   Journal = {Computational Statistics & Data Analysis},
   Volume = {68},
   Pages = {141-154},
   Publisher = {Elsevier BV},
   Year = {2013},
   Month = {July},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2013.06.018},
   Abstract = {We consider modeling spatio-temporally indexed relational
             data, motivated by analysis of voting data for the United
             States House of Representatives over two decades. The data
             are characterized by incomplete binary matrices,
             representing votes of legislators on legislation over time.
             The spatial covariates correspond to the location of a
             legislator's district, and time corresponds to the year of a
             vote. We seek to infer latent features associated with
             legislators and legislation, incorporating spatio-temporal
             structure. A model of such data must impose a flexible
             representation of the space-time structure, since the
             apportionment of House seats and the total number of
             legislators change over time. There are 435 congressional
             districts, with one legislator at a time for each district;
             however, the total number of legislators typically changes
             from year to year, for example due to deaths. A matrix
             kernel stick-breaking process (MKSBP) is proposed, with the
             model employed within a probit-regression construction.
             Theoretical properties of the model are discussed and
             posterior inference is developed using Markov chain Monte
             Carlo methods. Advantages over benchmark models are shown in
             terms of vote prediction and treatment of missing data.
             Marked improvements in results are observed based on
             leveraging spatial (geographical) information. © 2013
             Elsevier B.V. All rights reserved.},
   Doi = {10.1016/j.csda.2013.06.018},
   Key = {fds257868}
}

@article{fds257869,
   Author = {Lock, EF and Dunson, DB},
   Title = {Bayesian consensus clustering.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {29},
   Number = {20},
   Pages = {2610-2616},
   Year = {2013},
   Month = {October},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/23990412},
   Abstract = {In biomedical research a growing number of platforms and
             technologies are used to measure diverse but related
             information, and the task of clustering a set of objects
             based on multiple sources of data arises in several
             applications. Most current approaches to multisource
             clustering either independently determine a separate
             clustering for each data source or determine a single
             'joint' clustering for all data sources. There is a need for
             more flexible approaches that simultaneously model the
             dependence and the heterogeneity of the data sources.We
             propose an integrative statistical model that permits a
             separate clustering of the objects for each data source.
             These separate clusterings adhere loosely to an overall
             consensus clustering, and hence they are not independent. We
             describe a computationally scalable Bayesian framework for
             simultaneous estimation of both the consensus clustering and
             the source-specific clusterings. We demonstrate that this
             flexible approach is more robust than joint clustering of
             all data sources, and is more powerful than clustering each
             data source independently. We present an application to
             subtype identification of breast cancer tumor samples using
             publicly available data from The Cancer Genome Atlas.R code
             with instructions and examples is available at
             http://people.duke.edu/%7Eel113/software.html.},
   Doi = {10.1093/bioinformatics/btt425},
   Key = {fds257869}
}

@article{fds257859,
   Author = {Hannah, LA and Dunson, DB},
   Title = {Multivariate Convex Regression with Adaptive
             Partitioning},
   Journal = {Journal of Machine Learning Research},
   Volume = {14},
   Pages = {3261-3294},
   Publisher = {MICROTOME PUBL},
   Year = {2013},
   Month = {November},
   ISSN = {1532-4435},
   Abstract = {We propose a new, nonparametric method for multivariate
             regression subject to convexity or concavity constraints on
             the response function. Convexity constraints are common in
             economics, statistics, operations research, financial
             engineering and optimization, but there is currently no
             multivariate method that is stable and computationally
             feasible for more than a few thousand observations. We
             introduce convex adaptive partitioning (CAP), which creates
             a globally convex regression model from locally linear
             estimates fit on adaptively selected covariate partitions.
             CAP is a computationally efficient, consistent method for
             convex regression. We demonstrate empirical performance by
             comparing the performance of CAP to other shape-constrained
             and unconstrained regression methods for predicting weekly
             wages and value function approximation for pricing American
             basket options. © 2013 Lauren A. Hannah and David B.
             Dunson.},
   Key = {fds257859}
}

@article{fds257867,
   Author = {Li, D and Longnecker, MP and Dunson, DB},
   Title = {Lipid adjustment for chemical exposures: accounting for
             concomitant variables.},
   Journal = {Epidemiology (Cambridge, Mass.)},
   Volume = {24},
   Number = {6},
   Pages = {921-928},
   Year = {2013},
   Month = {November},
   url = {http://www.ncbi.nlm.nih.gov/pubmed/24051893},
   Abstract = {Some environmental chemical exposures are lipophilic and
             need to be adjusted by serum lipid levels before data
             analyses. There are currently various strategies that
             attempt to account for this problem, but all have their
             drawbacks. To address such concerns, we propose a new method
             that uses Box-Cox transformations and a simple Bayesian
             hierarchical model to adjust for lipophilic chemical
             exposures.We compared our Box-Cox method to existing
             methods. We ran simulation studies in which increasing
             levels of lipid-adjusted chemical exposure did and did not
             increase the odds of having a disease, and we looked at both
             single-exposure and multiple-exposure cases. We also
             analyzed an epidemiology dataset that examined the effects
             of various chemical exposure on the risk of birth
             defects.Compared with existing methods, our Box-Cox method
             produced unbiased estimates, good coverage, similar power,
             and lower type I error rates. This was the case in both
             single- and multiple-exposure simulation studies. Results
             from analysis of the birth-defect data differed from results
             using existing methods.Our Box-Cox method is a novel and
             intuitive way to account for the lipophilic nature of
             certain chemical exposures. It addresses some of the
             problems with existing methods, is easily extendable to
             multiple exposures, and can be used in any analysis that
             involves concomitant variables.},
   Doi = {10.1097/EDE.0b013e3182a671e4},
   Key = {fds257867}
}

@article{fds257860,
   Author = {Armagan, A and Dunson, DB and Lee, J and Bajwa, WU and Strawn,
             N},
   Title = {Posterior consistency in linear models under shrinkage
             priors},
   Journal = {Biometrika},
   Volume = {100},
   Number = {4},
   Pages = {1011-1018},
   Publisher = {Oxford University Press (OUP)},
   Year = {2013},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000327714200017&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {We investigate the asymptotic behaviour of posterior
             distributions of regression coefficients in high-dimensional
             linear models as the number of dimensions grows with the
             number of observations. We show that the posterior
             distribution concentrates in neighbourhoods of the true
             parameter under simple sufficient conditions. These
             conditions hold under popular shrinkage priors given some
             sparsity assumptions. © 2013 Biometrika
             Trust.},
   Doi = {10.1093/biomet/ast028},
   Key = {fds257860}
}

@article{fds257861,
   Author = {Canale, A and Dunson, DB},
   Title = {Nonparametric Bayes modelling of count processes},
   Journal = {Biometrika},
   Volume = {100},
   Number = {4},
   Pages = {801-816},
   Publisher = {Oxford University Press (OUP)},
   Year = {2013},
   Month = {December},
   ISSN = {0006-3444},
   url = {http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000327714200002&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=47d3190e77e5a3a53558812f597b0b92},
   Abstract = {Data on count processes arise in a variety of applications,
             including longitudinal, spatial and imaging studies
             measuring count responses. The literature on statistical
             models for dependent count data is dominated by models built
             from hierarchical Poisson components. The Poisson assumption
             is not warranted in many applied contexts, and hierarchical
             Poisson models make restrictive assumptions about
             overdispersion in marginal distributions. In this article we
             propose a class of nonparametric Bayes count process models,
             constructed through rounding real-valued underlying
             processes. The proposed class of models accommodates
             situations in which separate count-valued functional data
             are observed for each subject under study. Theoretical
             results on large support and posterior consistency are
             established, and computational algorithms are developed
             based on Markov chain Monte Carlo simulation. The methods
             are evaluated via simulation and illustrated by application
             to longitudinal tumour counts and to asthma inhaler usage.
             © 2013 Biometrika Trust.},
   Doi = {10.1093/biomet/ast037},
   Key = {fds257861}
}

@article{fds304006,
   Author = {Cornelis, B and Yang, Y and Vogelstein, JT and Dooms, A and Daubechies,
             I and Dunson, D},
   Title = {Bayesian crack detection in ultra high resolution multimodal
             images of paintings},
   Journal = {2013 18th International Conference on Digital Signal
             Processing, Dsp 2013},
   Year = {2013},
   Month = {December},
   url = {http://arxiv.org/abs/1304.5894v2},
   Abstract = {The preservation of our cultural heritage is of paramount
             importance. Thanks to recent developments in digital
             acquisition techniques, powerful image analysis algorithms
             are developed which can be useful non-invasive tools to
             assist in the restoration and preservation of art. In this
             paper we propose a semi-supervised crack detection method
             that can be used for high-dimensional acquisitions of
             paintings coming from different modalities. Our dataset
             consists of a recently acquired collection of images of the
             Ghent Altarpiece (1432), one of Northern Europe's most
             important art masterpieces. Our goal is to build a
             classifier that is able to discern crack pixels from the
             background consisting of non-crack pixels, making optimal
             use of the information that is provided by each modality. To
             accomplish this we employ a recently developed
             non-parametric Bayesian classifier, that uses tensor
             factorizations to characterize any conditional probability.
             A prior is placed on the parameters of the factorization
             such that every possible interaction between predictors is
             allowed while still identifying a sparse subset among these
             predictors. The proposed Bayesian classifier, which we will
             refer to as conditional Bayesian tensor factorization or
             CBTF, is assessed by visually comparing classification
             results with the Random Forest (RF) algorithm. © 2013
             IEEE.},
   Doi = {10.1109/ICDSP.2013.6622710},
   Key = {fds304006}
}

@article{fds257857,
   Author = {Chen, CWS and Dunson, D and Frühwirth-Schnatter, S and Walker,
             SG},
   Title = {Special issue on Bayesian computing, methods and
             applications},
   Journal = {Computational Statistics & Data Analysis},
   Volume = {71},
   Pages = {273-},
   Year = {2014},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2013.10.011},
   Doi = {10.1016/j.csda.2013.10.011},
   Key = {fds257857}
}

@article{fds257824,
   Author = {Rai, P and Wang, Y and Guo, S and Chen, G and Dunson, D and Carin,
             L},
   Title = {Scalable bayesian low-rank decomposition of incomplete
             multiway tensors},
   Journal = {31st International Conference on Machine Learning, Icml
             2014},
   Volume = {5},
   Pages = {3810-3820},
   Year = {2014},
   Month = {January},
   ISBN = {9781634393973},
   Abstract = {Copyright 2014 by the author(s). We present a scalable
             Bayesian framework for low-rank decomposition of multiway
             tensor data with missing observations. The key issue of
             pre-specifying the rank of the decomposition is sidestepped
             in a principled manner using a multiplicative gamma process
             prior. Both continuous and binary data can be analyzed under
             the framework, in a coherent way using fully conjugate
             Bayesian analysis. In particular, the analysis in the
             non-conjugate binary case is facilitated via the use of the
             Pólya-Gamma sampling strategy which elicits closed-form
             Gibbs sampling updates. The resulting samplers are efficient
             and enable us to apply our framework to large-scale
             problems, with time-complexity that is linear in the number
             of observed entries in the tensor. This is especially
             attractive in analyzing very large but sparsely observed
             tensors with very few known entries. Moreover, our method
             admits easy extension to the supervised setting where
             entities in one or more tensor modes have labels. Our method
             outperforms several state-of-the-art tensor decomposition
             methods on various synthetic and benchmark real-world
             datasets.},
   Key = {fds257824}
}

@article{fds257826,
   Author = {Wang, X and Peng, P and Dunson, DB},
   Title = {Median selection subset aggregation for parallel
             inference},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {3},
   Number = {January},
   Pages = {2195-2203},
   Year = {2014},
   Month = {January},
   ISSN = {1049-5258},
   Abstract = {For massive data sets, efficient computation commonly relies
             on distributed algorithms that store and process subsets of
             the data on different machines, minimizing communication
             costs. Our focus is on regression and classification
             problems involving many features. A variety of distributed
             algorithms have been proposed in this context, but
             challenges arise in defining an algorithm with low
             communication, theoretical guarantees and excellent
             practical performance in general settings. We propose a
             MEdian Selection Subset AGgregation Estimator (message)
             algorithm, which attempts to solve these problems. The
             algorithm applies feature selection in parallel for each
             subset using Lasso or another method, calculates the
             'median' feature inclusion index, estimates coefficients for
             the selected features in parallel for each subset, and then
             averages these estimates. The algorithm is simple, involves
             very minimal communication, scales efficiently in both
             sample and feature size, and has theoretical guarantees. In
             particular, we show model selection consistency and
             coefficient estimation efficiency. Extensive experiments
             show excellent performance in variable selection,
             estimation, prediction, and computation time relative to
             usual competitors.},
   Key = {fds257826}
}

@article{fds257831,
   Author = {Minsker, S and Srivastava, S and Lin, L and Dunson,
             DB},
   Title = {Scalable and robust Bayesian inference via the median
             posterior},
   Journal = {31st International Conference on Machine Learning, Icml
             2014},
   Volume = {5},
   Pages = {3629-3639},
   Year = {2014},
   Month = {January},
   ISBN = {9781634393973},
   Abstract = {Copyright 2014 by the author(s). Many Bayesian learning
             methods for massive data benefit from working with small
             subsets of observations. In particular, significant progress
             has been made in scalable Bayesian learning via stochastic
             approximation. However, Bayesian learning methods in
             distributed computing environments are often problem- or
             distribution-specific and use ad hoc techniques. We propose
             a novel general approach to Bayesian inference that is
             scalable and robust to corruption in the data. Our technique
             is based on the idea of splitting the data into several
             non-overlapping subgroups, evaluating the posterior
             distribution given each independent subgroup, and then
             combining the results. Our main contribution is the proposed
             aggregation step which is based on finding the geometric
             median of subset posterior distributions. Presented
             theoretical and numerical results confirm the advantages of
             our approach.},
   Key = {fds257831}
}

@article{fds257834,
   Author = {Rodriguez, A and Dunson, DB},
   Title = {Functional clustering in nested designs: Modeling
             variability in reproductive epidemiology
             studies},
   Journal = {The Annals of Applied Statistics},
   Volume = {8},
   Number = {3},
   Pages = {1416-1442},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {January},
   ISSN = {1932-6157},
   url = {http://dx.doi.org/10.1214/14-AOAS751},
   Abstract = {© Institute of Mathematical Statistics, 2014. We discuss
             functional clustering procedures for nested designs, where
             multiple curves are collected for each subject in the study.
             We start by considering the application of standard
             functional clustering tools to this problem, which leads to
             groupings based on the average profile for each subject.
             After discussing some of the shortcomings of this approach,
             we present a mixture model based on a generalization of the
             nested Dirichlet process that clusters subjects based on the
             distribution of their curves. By using mixtures of
             generalized Dirichlet processes, the model induces a much
             more flexible prior on the partition structure than other
             popular model-based clustering methods, allowing for
             different rates of introduction of new clusters as the
             number of observations increases. The methods are
             illustrated using hormone profiles from multiple menstrual
             cycles collected for women in the Early Pregnancy
             Study.},
   Doi = {10.1214/14-AOAS751},
   Key = {fds257834}
}

@article{fds257837,
   Author = {Kundu, S and Dunson, DB},
   Title = {Latent factor models for density estimation},
   Journal = {Biometrika},
   Volume = {101},
   Number = {3},
   Pages = {641-654},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/asu019},
   Abstract = {Although discrete mixture modelling has formed the backbone
             of the literature on Bayesian density estimation, there are
             some well-known disadvantages. As an alternative to discrete
             mixtures, we propose a class of priors based on random
             nonlinear functions of a uniform latent variable with an
             additive residual. The induced prior for the density is
             shown to have desirable properties, including ease of
             centring on an initial guess, large support, posterior
             consistency and straightforward computation via Gibbs
             sampling. Some advantages over discrete mixtures, such as
             Dirichlet process mixtures of Gaussian kernels, are
             discussed and illustrated via simulations and an
             application. © 2014 Biometrika Trust.},
   Doi = {10.1093/biomet/asu019},
   Key = {fds257837}
}

@article{fds257838,
   Author = {Hannah, LA and Powell, WB and Dunson, DB},
   Title = {Semiconvex regression for metamodeling-based
             optimization},
   Journal = {Siam Journal on Optimization},
   Volume = {24},
   Number = {2},
   Pages = {573-597},
   Publisher = {Society for Industrial & Applied Mathematics
             (SIAM)},
   Year = {2014},
   Month = {January},
   ISSN = {1052-6234},
   url = {http://dx.doi.org/10.1137/130907070},
   Abstract = {Stochastic search involves finding a set of controllable
             parameters that minimizes an unknown objective function
             using a set of noisy observations. We consider the case when
             the unknown function is convex and a metamodel is used as a
             surrogate objective function. Often he data are non-i.i.d.
             and include an observable state variable, such as applicant
             information in a loan rate decision problem. State
             information is difficult to incorporate into convex models.
             We propose a new semiconvex regression method that is used
             to produce a convex metamodel in the presence of a state
             variable. We show consistency for this method. We
             demonstrate its effectiveness for metamodeling on a set of
             synthetic inventory management problems and a large
             real-life auto loan dataset. © 2014 Society for Industrial
             and Applied Mathematics.},
   Doi = {10.1137/130907070},
   Key = {fds257838}
}

@article{fds257840,
   Author = {Pati, D and Bhattacharya, A and Pillai, NS and Dunson,
             D},
   Title = {Posterior contraction in sparse bayesian factor models for
             massive covariance matrices},
   Journal = {The Annals of Statistics},
   Volume = {42},
   Number = {3},
   Pages = {1102-1130},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {January},
   ISSN = {0090-5364},
   url = {http://dx.doi.org/10.1214/14-AOS1215},
   Abstract = {Sparse Bayesian factor models are routinely implemented for
             parsimonious dependence modeling and dimensionality
             reduction in highdimensional applications. We provide
             theoretical understanding of such Bayesian procedures in
             terms of posterior convergence rates in inferring
             high-dimensional covariance matrices where the dimension can
             be larger than the sample size. Under relevant sparsity
             assumptions on the true covariance matrix, we show that
             commonly-used point mass mixture priors on the factor
             loadings lead to consistent estimation in the operator norm
             even when pn. One of our major contributions is to develop a
             new class of continuous shrinkage priors and provide
             insights into their concentration around sparse vectors.
             Using such priors for the factor loadings, we obtain similar
             rate of convergence as obtained with point mass mixture
             priors. To obtain the convergence rates, we construct test
             functions to separate points in the space of
             high-dimensional covariance matrices using insights from
             random matrix theory; the tools developed may be of
             independent interest. We also derive minimax rates and show
             that the Bayesian posterior rates of convergence coincide
             with the minimax rates upto a √log n term.},
   Doi = {10.1214/14-AOS1215},
   Key = {fds257840}
}

@article{fds257841,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian dynamic financial networks with time-varying
             predictors},
   Journal = {Statistics & Probability Letters},
   Volume = {93},
   Pages = {19-26},
   Publisher = {Elsevier BV},
   Year = {2014},
   Month = {January},
   ISSN = {0167-7152},
   url = {http://dx.doi.org/10.1016/j.spl.2014.06.015},
   Abstract = {We propose a targeted and robust modeling of dependence in
             multivariate time series via dynamic networks, with
             time-varying predictors included to improve interpretation
             and prediction. The model is applied to financial markets,
             estimating effects of verbal and material cooperations. ©
             2014 Elsevier B.V.},
   Doi = {10.1016/j.spl.2014.06.015},
   Key = {fds257841}
}

@article{fds257842,
   Author = {Durante, D and Scarpa, B and Dunson, DB},
   Title = {Locally adaptive factor processes for multivariate time
             series},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1493-1522},
   Year = {2014},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {In modeling multivariate time series, it is important to
             allow time-varying smoothness in the mean and covariance
             process. In particular, there may be certain time intervals
             exhibiting rapid changes and others in which changes are
             slow. If such time-varying smoothness is not accounted for,
             one can obtain misleading inferences and predictions, with
             over-smoothing across erratic time intervals and
             under-smoothing across times exhibiting slow variation. This
             can lead to mis-calibration of predictive intervals, which
             can be substantially too narrow or wide depending on the
             time. We propose a locally adaptive factor process for
             characterizing multivariate mean-covariance changes in
             continuous time, allowing locally varying smoothness in both
             the mean and covariance matrix. This process is constructed
             utilizing latent dictionary functions evolving in time
             through nested Gaussian processes and linearly related to
             the observed data with a sparse mapping. Using a diffential
             equation representation, we bypass usual computational
             bottlenecks in obtaining MCMC and online algorithms for
             approximate Bayesian inference. The performance is assessed
             in simulations and illustrated in a financial application.
             © 2014 Daniele Durante, Bruno Scarpa and David B.
             Dunson.},
   Key = {fds257842}
}

@article{fds257843,
   Author = {Lin, L and Dunson, DB},
   Title = {Bayesian monotone regression using Gaussian process
             projection},
   Journal = {Biometrika},
   Volume = {101},
   Number = {2},
   Pages = {303-317},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {January},
   ISSN = {0006-3444},
   url = {http://dx.doi.org/10.1093/biomet/ast063},
   Abstract = {Shape-constrained regression analysis has applications in
             dose-response modelling, environmental risk assessment,
             disease screening and many other areas. Incorporating the
             shape constraints can improve estimation efficiency and
             avoid implausible results. We propose a novel method,
             focusing on monotone curve and surface estimation, which
             uses Gaussian process projections. Our inference is based on
             projecting posterior samples from the Gaussian process. We
             develop theory on continuity of the projection and rates of
             contraction. Our approach leads to simple computation with
             good performance in finite samples. The proposed projection
             method can also be applied to other constrained-function
             estimation problems, including those in multivariate
             settings. © 2014 Biometrika Trust.},
   Doi = {10.1093/biomet/ast063},
   Key = {fds257843}
}

@article{fds257844,
   Author = {Xing, Z and Nicholson, B and Jimenez, M and Veldman, T and Hudson, L and Lucas, J and Dunson, D and Zaas, AK and Woods, CW and Ginsburg, GS and Carin, L},
   Title = {Bayesian modeling of temporal properties of infectious
             disease in a college student population},
   Journal = {Journal of Applied Statistics},
   Volume = {41},
   Number = {6},
   Pages = {1358-1382},
   Year = {2014},
   Month = {January},
   ISSN = {0266-4763},
   url = {http://dx.doi.org/10.1080/02664763.2013.870138},
   Abstract = {A Bayesian statistical model is developed for analysis of
             the time-evolving properties of infectious disease, with a
             particular focus on viruses. The model employs a latent
             semi-Markovian state process, and the state-transition
             statistics are driven by three terms: (i) a general
             time-evolving trend of the overall population, (ii) a
             semi-periodic term that accounts for effects caused by the
             days of the week, and (iii) a regression term that relates
             the probability of infection to covariates (here,
             specifically, to the Google Flu Trends data). Computations
             are performed using Markov Chain Monte Carlo sampling.
             Results are presented using a novel data set: daily
             self-reported symptom scores from hundreds of Duke
             University undergraduate students, collected over three
             academic years. The illnesses associated with these students
             are (imperfectly) labeled using real-time (RT) polymerase
             chain reaction (PCR) testing for several viruses, and
             gene-expression data were also analyzed. The statistical
             analysis is performed on the daily, self-reported symptom
             scores, and the RT PCR and gene-expression data are employed
             for analysis and interpretation of the model results. ©
             2013 The Author(s). Published by Taylor &
             Francis.},
   Doi = {10.1080/02664763.2013.870138},
   Key = {fds257844}
}

@article{fds257845,
   Author = {Wade, S and Dunson, DB and Petrone, S and Trippa,
             L},
   Title = {Improving prediction from dirichlet process mixtures via
             enrichment},
   Journal = {Journal of Machine Learning Research},
   Volume = {15},
   Pages = {1041-1071},
   Year = {2014},
   Month = {January},
   ISSN = {1532-4435},
   Abstract = {Flexible covariate-dependent density estimation can be
             achieved by modelling the joint density of the response and
             covariates as a Dirichlet process mixture. An appealing
             aspect of this approach is that computations are relatively
             easy. In this paper, we examine the predictive performance
             of these models with an increasing number of covariates.
             Even for a moderate number of covariates, we find that the
             likelihood for x tends to dominate the posterior of the
             latent random partition, degrading the predictive
             performance of the model. To overcome this, we suggest using
             a different nonparametric prior, namely an enriched
             Dirichlet process. Our proposal maintains a simple
             allocation rule, so that computations remain relatively
             simple. Advantages are shown through both predictive
             equations and examples, including an application to
             diagnosis Alzheimer's disease. © 2014 Sara Wade, David B.
             Dunson, Sonia Petrone and Lorenzo Trippa.},
   Key = {fds257845}
}

@article{fds257851,
   Author = {Carlson, DE and Vogelstein, JT and Qisong Wu, and Wenzhao Lian, and Mingyuan Zhou, and Stoetzner, CR and Kipke, D and Weber, D and Dunson,
             DB and Carin, L},
   Title = {Multichannel electrophysiological spike sorting via joint
             dictionary learning and mixture modeling.},
   Journal = {Ieee Transactions on Bio Medical Engineering},
   Volume = {61},
   Number = {1},
   Pages = {41-54},
   Year = {2014},
   Month = {January},
   ISSN = {0018-9294},
   url = {http://dx.doi.org/10.1109/tbme.2013.2275751},
   Abstract = {We propose a methodology for joint feature learning and
             clustering of multichannel extracellular
             electrophysiological data, across multiple recording periods
             for action potential detection and classification (sorting).
             Our methodology improves over the previous state of the art
             principally in four ways. First, via sharing information
             across channels, we can better distinguish between
             single-unit spikes and artifacts. Second, our proposed
             "focused mixture model" (FMM) deals with units appearing,
             disappearing, or reappearing over multiple recording days,
             an important consideration for any chronic experiment.
             Third, by jointly learning features and clusters, we improve
             performance over previous attempts that proceeded via a
             two-stage learning process. Fourth, by directly modeling
             spike rate, we improve the detection of sparsely firing
             neurons. Moreover, our Bayesian methodology seamlessly
             handles missing data. We present the state-of-the-art
             performance without requiring manually tuning
             hyperparameters, considering both a public dataset with
             partial ground truth and a new experimental
             dataset.},
   Doi = {10.1109/tbme.2013.2275751},
   Key = {fds257851}
}

@article{fds257858,
   Author = {Chen, CWS and Dunson, D and Frühwirth-Schnatter, S and Walker,
             SG},
   Title = {Special issue on Bayesian computing, methods and
             applications},
   Journal = {Computational Statistics & Data Analysis},
   Volume = {71},
   Pages = {273},
   Publisher = {Elsevier BV},
   Year = {2014},
   Month = {January},
   ISSN = {0167-9473},
   url = {http://dx.doi.org/10.1016/j.csda.2013.10.011},
   Doi = {10.1016/j.csda.2013.10.011},
   Key = {fds257858}
}

@article{fds322557,
   Author = {Bhattacharya, A and Pati, D and Dunson, D},
   Title = {Anisotropic function estimation using multi-bandwidth
             Gaussian processes},
   Journal = {The Annals of Statistics},
   Volume = {42},
   Number = {1},
   Pages = {352-381},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2014},
   Month = {January},
   url = {http://dx.doi.org/10.1214/13-AOS1192},
   Abstract = {© Institute of Mathematical Statistics, 2014. In
             nonparametric regression problems involving multiple
             predictors, there is typically interest in estimating an
             anisotropic multivariate regression surface in the important
             predictors while discarding the unimportant ones. Our focus
             is on defining a Bayesian procedure that leads to the
             minimax optimal rate of posterior contraction (up to a log
             factor) adapting to the unknown dimension and anisotropic
             smoothness of the true surface. We propose such an approach
             based on a Gaussian process prior with dimension-specific
             scalings, which are assigned carefully-chosen hyperpriors.
             We additionally show that using a homogenous Gaussian
             process with a single bandwidth leads to a sub-optimal rate
             in anisotropic cases.},
   Doi = {10.1214/13-AOS1192},
   Key = {fds322557}
}

@article{fds322558,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian logistic Gaussian process models for dynamic
             networks},
   Journal = {Journal of Machine Learning Research},
   Volume = {33},
   Pages = {194-201},
   Year = {2014},
   Month = {January},
   Abstract = {Time-varying adjacency matrices encoding the presence or
             absence of a relation among entities are available in many
             research fields. Motivated by an application to studying
             dynamic networks among sports teams, we propose a Bayesian
             nonparametric model. The proposed approach uses a logistic
             mapping from the probability matrix, encoding link
             probabilities between each team, to an embedded latent
             relational space. Within this latent space, we incorporate a
             dictionary of Gaussian process (GP) latent trajectories
             characterizing changes over time in each team, while
             allowing learning of the number of latent dimensions through
             a specially tailored prior for the GP covariance. The model
             is provably flexible and borrows strength across the network
             and over time. We provide simulation experiments and an
             application to the Italian soccer Championship.},
   Key = {fds322558}
}

@article{fds322559,
   Author = {Scarpa, B and Dunson, DB},
   Title = {Enriched Stick Breaking Processes for Functional
             Data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {506},
   Pages = {647-660},
   Year = {2014},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2013.866564},
   Abstract = {In many applications involving functional data, prior
             information is available about the proportion of curves
             having different attributes. It is not straightforward to
             include such information in existing procedures for
             functional data analysis. Generalizing the functional
             Dirichlet process (FDP), we propose a class of
             stick-breaking priors for distributions of functions. These
             priors incorporate functional atoms drawn from constrained
             stochastic processes. The stick-breaking weights are
             specified to allow user-specified prior probabilities for
             curve attributes, with hyperpriors accommodating
             uncertainty. Compared with the FDP, the random distribution
             is enriched for curves having attributes known to be common.
             Theoretical properties are considered, methods are developed
             for posterior computation, and the approach is illustrated
             using data on temperature curves in menstrual
             cycles.},
   Doi = {10.1080/01621459.2013.866564},
   Key = {fds322559}
}

@article{fds323267,
   Author = {Carlson, DE and Vogelstein, JT and Wu, Q and Lian, W and Zhou, M and Stoetzner, CR and Kipke, D and Weber, D and Dunson, DB and Carin,
             L},
   Title = {Multichannel electrophysiological spike sorting via joint
             dictionary learning and mixture modeling},
   Journal = {Ieee Transactions on Bio Medical Engineering},
   Volume = {61},
   Number = {1},
   Pages = {41-54},
   Year = {2014},
   Month = {January},
   url = {http://dx.doi.org/10.1109/TBME.2013.2275751},
   Abstract = {We propose a methodology for joint feature learning and
             clustering of multichannel extracellular
             electrophysiological data, across multiple recording periods
             for action potential detection and classification (sorting).
             Our methodology improves over the previous state of the art
             principally in four ways. First, via sharing information
             across channels, we can better distinguish between
             single-unit spikes and artifacts. Second, our proposed
             "focused mixture model" (FMM) deals with units appearing,
             disappearing, or reappearing over multiple recording days,
             an important consideration for any chronic experiment.
             Third, by jointly learning features and clusters, we improve
             performance over previous attempts that proceeded via a
             two-stage learning process. Fourth, by directly modeling
             spike rate, we improve the detection of sparsely firing
             neurons. Moreover, our Bayesian methodology seamlessly
             handles missing data. We present the state-of-the-art
             performance without requiring manually tuning
             hyperparameters, considering both a public dataset with
             partial ground truth and a new experimental dataset. © 2013
             IEEE.},
   Doi = {10.1109/TBME.2013.2275751},
   Key = {fds323267}
}

@article{fds257823,
   Author = {Yin, R and Dunson, D and Cornelis, B and Brown, B and Ocon, N and Daubechies, I},
   Title = {Digital cradle removal in X-ray images of art
             paintings},
   Journal = {2014 Ieee International Conference on Image Processing, Icip
             2014},
   Pages = {4299-4303},
   Publisher = {IEEE},
   Year = {2014},
   Month = {January},
   ISBN = {9781479957514},
   url = {http://dx.doi.org/10.1109/ICIP.2014.7025873},
   Abstract = {© 2014 IEEE. We introduce an algorithm that removes the
             deleterious effect of cradling on X-ray images of paintings
             on wooden panels. The algorithm consists of a three stage
             procedure. Firstly, the cradled regions are located
             automatically. The second step consists of separating the
             X-ray image into a textural and image component. In the last
             step the algorithm learns to distinguish between the texture
             caused by the wooden cradle and the texture belonging to the
             original painted wooden panel. The results obtained with our
             method are compared with those obtained manually by best
             current practice.},
   Doi = {10.1109/ICIP.2014.7025873},
   Key = {fds257823}
}

@article{fds257850,
   Author = {Cui, K and Dunson, DB},
   Title = {Generalized Dynamic Factor Models for Mixed-Measurement Time
             Series.},
   Journal = {Journal of Computational and Graphical Statistics : a Joint
             Publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {23},
   Number = {1},
   Pages = {169-191},
   Year = {2014},
   Month = {February},
   ISSN = {1061-8600},
   url = {http://dx.doi.org/10.1080/10618600.2012.729986},
   Abstract = {In this article, we propose generalized Bayesian dynamic
             factor models for jointly modeling mixed-measurement time
             series. The framework allows mixed-scale measurements
             associated with each time series, with different
             measurements having different distributions in the
             exponential family conditionally on time-varying latent
             factor(s). Efficient Bayesian computational algorithms are
             developed for posterior inference on both the latent factors
             and model parameters, based on a Metropolis Hastings
             algorithm with adaptive proposals. The algorithm relies on a
             Greedy Density Kernel Approximation (GDKA) and parameter
             expansion with latent factor normalization. We tested the
             framework and algorithms in simulated studies and applied
             them to the analysis of intertwined credit and recovery risk
             for Moody's rated firms from 1982-2008, illustrating the
             importance of jointly modeling mixed-measurement time
             series. The article has supplemental materials available
             online.},
   Doi = {10.1080/10618600.2012.729986},
   Key = {fds257850}
}

@article{fds257853,
   Author = {Pati, D and Dunson, DB},
   Title = {Bayesian nonparametric regression with varying residual
             density},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {66},
   Number = {1},
   Pages = {1-31},
   Year = {2014},
   Month = {February},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-013-0415-z},
   Abstract = {We consider the problem of robust Bayesian inference on the
             mean regression function allowing the residual density to
             change flexibly with predictors. The proposed class of
             models is based on a Gaussian process (GP) prior for the
             mean regression function and mixtures of Gaussians for the
             collection of residual densities indexed by predictors.
             Initially considering the homoscedastic case, we propose
             priors for the residual density based on probit
             stick-breaking mixtures. We provide sufficient conditions to
             ensure strong posterior consistency in estimating the
             regression function, generalizing existing theory focused on
             parametric residual distributions. The homoscedastic priors
             are generalized to allow residual densities to change
             nonparametrically with predictors through incorporating GP
             in the stick-breaking components. This leads to a robust
             Bayesian regression procedure that automatically
             down-weights outliers and influential observations in a
             locally adaptive manner. The methods are illustrated using
             simulated and real data applications. © 2013 The Institute
             of Statistical Mathematics, Tokyo.},
   Doi = {10.1007/s10463-013-0415-z},
   Key = {fds257853}
}

@article{fds257874,
   Author = {Pati, D and Dunson, DB},
   Title = {Bayesian nonparametric regression with varying residual
             density.},
   Journal = {Annals of the Institute of Statistical Mathematics},
   Volume = {66},
   Number = {1},
   Pages = {1-31},
   Year = {2014},
   Month = {February},
   ISSN = {0020-3157},
   url = {http://dx.doi.org/10.1007/s10463-013-0415-z},
   Abstract = {We consider the problem of robust Bayesian inference on the
             mean regression function allowing the residual density to
             change flexibly with predictors. The proposed class of
             models is based on a Gaussian process prior for the mean
             regression function and mixtures of Gaussians for the
             collection of residual densities indexed by predictors.
             Initially considering the homoscedastic case, we propose
             priors for the residual density based on probit
             stick-breaking (PSB) scale mixtures and symmetrized PSB
             (sPSB) location-scale mixtures. Both priors restrict the
             residual density to be symmetric about zero, with the sPSB
             prior more flexible in allowing multimodal densities. We
             provide sufficient conditions to ensure strong posterior
             consistency in estimating the regression function under the
             sPSB prior, generalizing existing theory focused on
             parametric residual distributions. The PSB and sPSB priors
             are generalized to allow residual densities to change
             nonparametrically with predictors through incorporating
             Gaussian processes in the stick-breaking components. This
             leads to a robust Bayesian regression procedure that
             automatically down-weights outliers and influential
             observations in a locally-adaptive manner. Posterior
             computation relies on an efficient data augmentation exact
             block Gibbs sampler. The methods are illustrated using
             simulated and real data applications.},
   Doi = {10.1007/s10463-013-0415-z},
   Key = {fds257874}
}

@article{fds257846,
   Author = {Kundu, S and Dunson, DB},
   Title = {Bayes variable selection in semiparametric linear
             models.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {505},
   Pages = {437-447},
   Year = {2014},
   Month = {March},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.881153},
   Abstract = {There is a rich literature on Bayesian variable selection
             for parametric models. Our focus is on generalizing methods
             and asymptotic theory established for mixtures of g-priors
             to semiparametric linear regression models having unknown
             residual densities. Using a Dirichlet process location
             mixture for the residual density, we propose a
             semiparametric g-prior which incorporates an unknown matrix
             of cluster allocation indicators. For this class of priors,
             posterior computation can proceed via a straightforward
             stochastic search variable selection algorithm. In addition,
             Bayes factor and variable selection consistency is shown to
             result under a class of proper priors on g even when the
             number of candidate predictors p is allowed to increase much
             faster than sample size n, while making sparsity assumptions
             on the true model size.},
   Doi = {10.1080/01621459.2014.881153},
   Key = {fds257846}
}

@article{fds257839,
   Author = {Zhang, J and Jima, D and Moffitt, AB and Liu, Q and Czader, M and Hsi, ED and Fedoriw, Y and Dunphy, CH and Richards, KL and Gill, JI and Sun, Z and Love, C and Scotland, P and Lock, E and Levy, S and Hsu, DS and Dunson, D and Dave, SS},
   Title = {The genomic landscape of mantle cell lymphoma is related to
             the epigenetically determined chromatin state of normal B
             cells.},
   Journal = {Blood},
   Volume = {123},
   Number = {19},
   Pages = {2988-2996},
   Year = {2014},
   Month = {May},
   ISSN = {0006-4971},
   url = {http://dx.doi.org/10.1182/blood-2013-07-517177},
   Abstract = {In this study, we define the genetic landscape of mantle
             cell lymphoma (MCL) through exome sequencing of 56 cases of
             MCL. We identified recurrent mutations in ATM, CCND1, MLL2,
             and TP53. We further identified a number of novel genes
             recurrently mutated in patients with MCL including RB1,
             WHSC1, POT1, and SMARCA4. We noted that MCLs have a distinct
             mutational profile compared with lymphomas from other B-cell
             stages. The ENCODE project has defined the chromatin
             structure of many cell types. However, a similar
             characterization of primary human mature B cells has been
             lacking. We defined, for the first time, the chromatin
             structure of primary human naïve, germinal center, and
             memory B cells through chromatin immunoprecipitation and
             sequencing for H3K4me1, H3K4me3, H3Ac, H3K36me3, H3K27me3,
             and PolII. We found that somatic mutations that occur more
             frequently in either MCLs or Burkitt lymphomas were
             associated with open chromatin in their respective B cells
             of origin, naïve B cells, and germinal center B cells. Our
             work thus elucidates the landscape of gene-coding mutations
             in MCL and the critical interplay between epigenetic
             alterations associated with B-cell differentiation and the
             acquisition of somatic mutations in cancer.},
   Doi = {10.1182/blood-2013-07-517177},
   Key = {fds257839}
}

@article{fds257852,
   Author = {Kessler, DC and Taylor, JA and Dunson, DB},
   Title = {Learning phenotype densities conditional on many interacting
             predictors.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {30},
   Number = {11},
   Pages = {1562-1568},
   Year = {2014},
   Month = {June},
   ISSN = {1367-4803},
   url = {http://dx.doi.org/10.1093/bioinformatics/btu040},
   Abstract = {Estimating a phenotype distribution conditional on a set of
             discrete-valued predictors is a commonly encountered task.
             For example, interest may be in how the density of a
             quantitative trait varies with single nucleotide
             polymorphisms and patient characteristics. The subset of
             important predictors is not usually known in advance. This
             becomes more challenging with a high-dimensional predictor
             set when there is the possibility of interaction.We
             demonstrate a novel non-parametric Bayes method based on a
             tensor factorization of predictor-dependent weights for
             Gaussian kernels. The method uses multistage predictor
             selection for dimension reduction, providing succinct models
             for the phenotype distribution. The resulting conditional
             density morphs flexibly with the selected predictors. In a
             simulation study and an application to molecular
             epidemiology data, we demonstrate advantages over commonly
             used methods.},
   Doi = {10.1093/bioinformatics/btu040},
   Key = {fds257852}
}

@article{fds257836,
   Author = {Wheeler, MW and Dunson, DB and Pandalai, SP and Baker, BA and Herring,
             AH},
   Title = {Mechanistic Hierarchical Gaussian Processes.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {507},
   Pages = {894-904},
   Year = {2014},
   Month = {July},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.899234},
   Abstract = {The statistics literature on functional data analysis
             focuses primarily on flexible black-box approaches, which
             are designed to allow individual curves to have essentially
             any shape while characterizing variability. Such methods
             typically cannot incorporate mechanistic information, which
             is commonly expressed in terms of differential equations.
             Motivated by studies of muscle activation, we propose a
             nonparametric Bayesian approach that takes into account
             mechanistic understanding of muscle physiology. A novel
             class of hierarchical Gaussian processes is defined that
             favors curves consistent with differential equations defined
             on motor, damper, spring systems. A Gibbs sampler is
             proposed to sample from the posterior distribution and
             applied to a study of rats exposed to non-injurious muscle
             activation protocols. Although motivated by muscle force
             data, a parallel approach can be used to include mechanistic
             information in broad functional data analysis
             applications.},
   Doi = {10.1080/01621459.2014.899234},
   Key = {fds257836}
}

@article{fds257835,
   Author = {Dunson, DB},
   Title = {Comment},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {507},
   Pages = {890-891},
   Publisher = {Informa UK Limited},
   Year = {2014},
   Month = {July},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.955988},
   Doi = {10.1080/01621459.2014.955988},
   Key = {fds257835}
}

@article{fds257833,
   Author = {Gu, K and Pati, D and Dunson, DB},
   Title = {Bayesian Multiscale Modeling of Closed Curves in Point
             Clouds.},
   Journal = {Journal of the American Statistical Association},
   Volume = {109},
   Number = {508},
   Pages = {1481-1494},
   Year = {2014},
   Month = {October},
   ISSN = {0162-1459},
   url = {http://dx.doi.org/10.1080/01621459.2014.934825},
   Abstract = {Modeling object boundaries based on image or point cloud
             data is frequently necessary in medical and scientific
             applications ranging from detecting tumor contours for
             targeted radiation therapy, to the classification of
             organisms based on their structural information. In
             low-contrast images or sparse and noisy point clouds, there
             is often insufficient data to recover local segments of the
             boundary in isolation. Thus, it becomes critical to model
             the entire boundary in the form of a closed curve. To
             achieve this, we develop a Bayesian hierarchical model that
             expresses highly diverse 2D objects in the form of closed
             curves. The model is based on a novel multiscale deformation
             process. By relating multiple objects through a hierarchical
             formulation, we can successfully recover missing boundaries
             by borrowing structural information from similar objects at
             the appropriate scale. Furthermore, the model's latent
             parameters help interpret the population, indicating
             dimensions of significant structural variability and also
             specifying a 'central curve' that summarizes the collection.
             Theoretical properties of our prior are studied in specific
             cases and efficient Markov chain Monte Carlo methods are
             developed, evaluated through simulation examples and applied
             to panorex teeth images for modeling teeth contours and also
             to a brain tumor contour detection problem.},
   Doi = {10.1080/01621459.2014.934825},
   Key = {fds257833}
}

@article{fds257865,
   Author = {Yang, H and Liu, F and Ji, C and Dunson, D},
   Title = {Adaptive sampling for Bayesian geospatial
             models},
   Journal = {Statistics and Computing},
   Volume = {24},
   Number = {6},
   Pages = {1101-1110},
   Publisher = {Springer Nature},
   Year = {2014},
   Month = {November},
   ISSN = {0960-3174},
   url = {http://dx.doi.org/10.1007/s11222-013-9422-4},
   Abstract = {© 2013, Springer Science+Business Media New York. Bayesian
             hierarchical modeling with Gaussian process random effects
             provides a popular approach for analyzing point-referenced
             spatial data. For large spatial data sets, however, generic
             posterior sampling is infeasible due to the extremely high
             computational burden in decomposing the spatial correlation
             matrix. In this paper, we propose an efficient
             algorithm—the adaptive griddy Gibbs (AGG) algorithm—to
             address the computational issues with large spatial data
             sets. The proposed algorithm dramatically reduces the
             computational complexity. We show theoretically that the
             proposed method can approximate the real posterior
             distribution accurately. The sufficient number of grid
             points for a required accuracy has also been derived. We
             compare the performance of AGG with that of the
             state-of-the-art methods in simulation studies. Finally, we
             apply AGG to spatially indexed data concerning building
             energy consumption.},
   Doi = {10.1007/s11222-013-9422-4},
   Key = {fds257865}
}

@article{fds322556,
   Author = {Durante, D and Dunson, DB},
   Title = {Nonparametric Bayes dynamic modelling of relational
             data},
   Journal = {Biometrika},
   Volume = {101},
   Number = {4},
   Pages = {883-898},
   Publisher = {Oxford University Press (OUP)},
   Year = {2014},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asu040},
   Abstract = {© 2014 Biometrika Trust. Symmetric binary matrices
             representing relations are collected in many areas. Our
             focus is on dynamically evolving binary relational matrices,
             with interest being on inference on the relationship
             structure and prediction. We propose a nonparametric
             Bayesian dynamic model, which reduces dimensionality in
             characterizing the binary matrix through a lower-dimensional
             latent space representation, with the latent coordinates
             evolving in continuous time via Gaussian processes. By using
             a logistic mapping function from the link probability matrix
             space to the latent relational space, we obtain a flexible
             and computationally tractable formulation. Employing
             Ṕolya-gamma data augmentation, an efficient Gibbs sampler
             is developed for posterior computation, with the dimension
             of the latent space automatically inferred. We provide
             theoretical results on flexibility of the model, and
             illustrate its performance via simulation experiments.We
             also consider an application to co-movements in world
             financial markets.},
   Doi = {10.1093/biomet/asu040},
   Key = {fds322556}
}

@article{fds257827,
   Author = {Chabout, J and Sarkar, A and Dunson, DB and Jarvis,
             ED},
   Title = {Male mice song syntax depends on social contexts and
             influences female preferences.},
   Journal = {Frontiers in Behavioral Neuroscience},
   Volume = {9},
   Pages = {76},
   Publisher = {FRONTIERS MEDIA SA},
   Year = {2015},
   url = {http://hdl.handle.net/10161/9544 Duke open
             access},
   Abstract = {In 2005, Holy and Guo advanced the idea that male mice
             produce ultrasonic vocalizations (USV) with some features
             similar to courtship songs of songbirds. Since then, studies
             showed that male mice emit USV songs in different contexts
             (sexual and other) and possess a multisyllabic repertoire.
             Debate still exists for and against plasticity in their
             vocalizations. But the use of a multisyllabic repertoire can
             increase potential flexibility and information, in how
             elements are organized and recombined, namely syntax. In
             many bird species, modulating song syntax has ethological
             relevance for sexual behavior and mate preferences. In this
             study we exposed adult male mice to different social
             contexts and developed a new approach of analyzing their
             USVs based on songbird syntax analysis. We found that male
             mice modify their syntax, including specific sequences,
             length of sequence, repertoire composition, and spectral
             features, according to stimulus and social context. Males
             emit longer and simpler syllables and sequences when singing
             to females, but more complex syllables and sequences in
             response to fresh female urine. Playback experiments show
             that the females prefer the complex songs over the simpler
             ones. We propose the complex songs are to lure females in,
             whereas the directed simpler sequences are used for direct
             courtship. These results suggest that although mice have a
             much more limited ability of song modification, they could
             still be used as animal models for understanding some vocal
             communication features that songbirds are used
             for.},
   Doi = {10.3389/fnbeh.2015.00076},
   Key = {fds257827}
}

@article{fds331654,
   Author = {Johndrow, JE and Mattingly, JC and Mukherjee, S and Dunson,
             D},
   Title = {Optimal approximating Markov chains for Bayesian
             inference},
   Year = {2015},
   Abstract = {The Markov Chain Monte Carlo method is the dominant paradigm
             for posterior computation in Bayesian analysis. It is common
             to control computation time by making approximations to the
             Markov transition kernel. Comparatively little attention has
             been paid to computational optimality in these approximating
             Markov Chains, or when such approximations are justified
             relative to obtaining shorter paths from the exact kernel.
             We give simple, sharp bounds for uniform approximations of
             uniformly mixing Markov chains. We then suggest a notion of
             optimality that incorporates computation time and
             approximation error, and use our bounds to make
             generalizations about properties of good approximations in
             the uniformly mixing setting. The relevance of these
             properties is demonstrated in applications to a
             minibatching-based approximate MCMC algorithm for large $n$
             logistic regression and low-rank approximations for Gaussian
             processes.},
   Key = {fds331654}
}

@article{fds257828,
   Author = {Canale, A and Dunson, DB},
   Title = {Bayesian multivariate mixed-scale density
             estimation},
   Journal = {Statistics and Its Interface},
   Volume = {8},
   Number = {2},
   Pages = {195-201},
   Publisher = {International Press of Boston},
   Year = {2015},
   Month = {January},
   ISSN = {1938-7989},
   url = {http://dx.doi.org/10.4310/SII.2015.v8.n2.a7},
   Abstract = {Although continuous density estimation has received abundant
             attention in the Bayesian nonparametrics literature, there
             is limited theory on multivariate mixed scale density
             estimation. In this note, we consider a general framework to
             jointly model continuous, count and categorical variables
             under a nonparametric prior, which is induced through
             rounding latent variables having an unknown density with
             respect to Lebesgue measure. For the proposed class of
             priors, we provide sufficient conditions for large support,
             strong consistency and rates of posterior contraction. These
             conditions allow one to convert sufficient conditions
             obtained in the setting of multivariate continuous density
             estimation to the mixed scale case. To illustrate the
             procedure, a rounded multivariate nonparametric mixture of
             Gaussians is introduced and applied to a crime and
             communities dataset.},
   Doi = {10.4310/SII.2015.v8.n2.a7},
   Key = {fds257828}
}

@article{fds257832,
   Author = {Kessler, DC and Hoff, PD and Dunson, DB},
   Title = {Marginally specified priors for non-parametric Bayesian
             estimation},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {77},
   Number = {1},
   Pages = {35-58},
   Year = {2015},
   Month = {January},
   ISSN = {1369-7412},
   url = {http://dx.doi.org/10.1111/rssb.12059},
   Abstract = {© 2014 Royal Statistical Society. Prior specification for
             non-parametric Bayesian inference involves the difficult
             task of quantifying prior knowledge about a parameter of
             high, often infinite, dimension. A statistician is unlikely
             to have informed opinions about all aspects of such a
             parameter but will have real information about functionals
             of the parameter, such as the population mean or variance.
             The paper proposes a new framework for non-parametric Bayes
             inference in which the prior distribution for a possibly
             infinite dimensional parameter is decomposed into two parts:
             an informative prior on a finite set of functionals, and a
             non-parametric conditional prior for the parameter given the
             functionals. Such priors can be easily constructed from
             standard non-parametric prior distributions in common use
             and inherit the large support of the standard priors on
             which they are based. Additionally, posterior approximations
             under these informative priors can generally be made via
             minor adjustments to existing Markov chain approximation
             algorithms for standard non-parametric prior distributions.
             We illustrate the use of such priors in the context of
             multivariate density estimation using Dirichlet process
             mixture models, and in the modelling of high dimensional
             sparse contingency tables.},
   Doi = {10.1111/rssb.12059},
   Key = {fds257832}
}

@article{fds257849,
   Author = {Kessler, DC and Hoff, PD and Dunson, DB},
   Title = {Marginally specified priors for non-parametric Bayesian
             estimation.},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {77},
   Number = {1},
   Pages = {35-58},
   Year = {2015},
   Month = {January},
   ISSN = {1369-7412},
   url = {http://dx.doi.org/10.1111/rssb.12059},
   Abstract = {Prior specification for non-parametric Bayesian inference
             involves the difficult task of quantifying prior knowledge
             about a parameter of high, often infinite, dimension. A
             statistician is unlikely to have informed opinions about all
             aspects of such a parameter but will have real information
             about functionals of the parameter, such as the population
             mean or variance. The paper proposes a new framework for
             non-parametric Bayes inference in which the prior
             distribution for a possibly infinite dimensional parameter
             is decomposed into two parts: an informative prior on a
             finite set of functionals, and a non-parametric conditional
             prior for the parameter given the functionals. Such priors
             can be easily constructed from standard non-parametric prior
             distributions in common use and inherit the large support of
             the standard priors on which they are based. Additionally,
             posterior approximations under these informative priors can
             generally be made via minor adjustments to existing Markov
             chain approximation algorithms for standard non-parametric
             prior distributions. We illustrate the use of such priors in
             the context of multivariate density estimation using
             Dirichlet process mixture models, and in the modelling of
             high dimensional sparse contingency tables.},
   Doi = {10.1111/rssb.12059},
   Key = {fds257849}
}

@article{fds322544,
   Author = {Van Den Boom and W and Dunson, D and Reeves, G},
   Title = {Quantifying uncertainty in variable selection with arbitrary
             matrices},
   Journal = {2015 Ieee 6th International Workshop on Computational
             Advances in Multi Sensor Adaptive Processing, Camsap
             2015},
   Pages = {385-388},
   Year = {2015},
   Month = {January},
   ISBN = {9781479919635},
   url = {http://dx.doi.org/10.1109/CAMSAP.2015.7383817},
   Abstract = {© 2015 IEEE. Probabilistically quantifying uncertainty in
             parameters, predictions and decisions is a crucial component
             of broad scientific and engineering applications. This is
             however difficult if the number of parameters far exceeds
             the sample size. Although there are currently many methods
             which have guarantees for problems characterized by large
             random matrices, there is often a gap between theory and
             practice when it comes to measures of statistical
             significance for matrices encountered in real-world
             applications. This paper proposes a scalable framework that
             utilizes state-of-the-art methods to provide approximations
             to the marginal posterior distributions. This framework is
             used to approximate marginal posterior inclusion
             probabilities for Bayesian variable selection.},
   Doi = {10.1109/CAMSAP.2015.7383817},
   Key = {fds322544}
}

@article{fds322551,
   Author = {Zhou, J and Bhattacharya, A and Herring, A and Dunson,
             D},
   Title = {Bayesian factorizations of big sparse tensors.},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1562-1576},
   Publisher = {Informa UK Limited},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2014.983233},
   Abstract = {It has become routine to collect data that are structured as
             multiway arrays (tensors). There is an enormous literature
             on low rank and sparse matrix factorizations, but limited
             consideration of extensions to the tensor case in
             statistics. The most common low rank tensor factorization
             relies on parallel factor analysis (PARAFAC), which
             expresses a rank k tensor as a sum of rank one tensors. When
             observations are only available for a tiny subset of the
             cells of a big tensor, the low rank assumption is not
             sufficient and PARAFAC has poor performance. We induce an
             additional layer of dimension reduction by allowing the
             effective rank to vary across dimensions of the table. For
             concreteness, we focus on a contingency table application.
             Taking a Bayesian approach, we place priors on terms in the
             factorization and develop an efficient Gibbs sampler for
             posterior computation. Theory is provided showing posterior
             concentration rates in high-dimensional settings, and the
             methods are shown to have excellent performance in
             simulations and several real data applications.},
   Doi = {10.1080/01621459.2014.983233},
   Key = {fds322551}
}

@article{fds322553,
   Author = {Srivastava, S and Cevher, V and Tran-Dinh, Q and Dunson,
             DB},
   Title = {WASP: Scalable Bayes via barycenters of subset
             posteriors},
   Journal = {Journal of Machine Learning Research},
   Volume = {38},
   Pages = {912-920},
   Year = {2015},
   Month = {January},
   Abstract = {Copyright 2015 by the authors. The promise of Bayesian
             methods for big data sets has not fully been realized due to
             the lack of scalable computational algorithms. For massive
             data, it is necessary to store and process subsets on
             different machines in a distributed manner. We propose a
             simple, general, and highly efficient approach, which first
             runs a posterior sampling algorithm in parallel on different
             machines for subsets of a large data set. To combine these
             subset posteriors, we calculate the Wasserstein barycenter
             via a highly efficient linear program. The resulting
             estimate for the Wasserstein posterior (WASP) has an atomic
             form, facilitating straightforward estimation of posterior
             summaries of functionals of interest. The WASP approach
             allows posterior sampling algorithms for smaller data sets
             to be trivially scaled to huge data. We provide theoretical
             justification in terms of posterior consistency and
             algorithm efficiency. Examples are provided in complex
             settings including Gaussian process regression and
             nonparametric Bayes mixture models.},
   Key = {fds322553}
}

@article{fds322554,
   Author = {Wang, X and Leng, C and Dunson, DB},
   Title = {On the consistency theory of high dimensional variable
             screening},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {2431-2439},
   Year = {2015},
   Month = {January},
   Abstract = {Variable screening is a fast dimension reduction technique
             for assisting high dimensional feature selection. As a
             preselection method, it selects a moderate size subset of
             candidate variables for further refining via feature
             selection to produce the final model. The performance of
             variable screening depends on both computational efficiency
             and the ability to dramatically reduce the number of
             variables without discarding the important ones. When the
             data dimension p is substantially larger than the sample
             size n, variable screening becomes crucial as 1) Faster
             feature selection algorithms are needed; 2) Conditions
             guaranteeing selection consistency might fail to hold. This
             article studies a class of linear screening methods and
             establishes consistency theory for this special class. In
             particular, we prove the restricted diagonally dominant
             (RDD) condition is a necessary and sufficient condition for
             strong screening consistency. As concrete examples, we show
             two screening methods SIS and HOLP are both strong screening
             consistent (subject to additional constraints) with large
             probability if n > O((ρgma;/τ)2logp) under random designs.
             In addition, we relate the RDD condition to the
             irrepresentable condition, and highlight limitations of
             SIS.},
   Key = {fds322554}
}

@article{fds322029,
   Author = {Wang, X and Guo, F and Heller, KA and Dunson, DB},
   Title = {Parallelizing MCMC with random partition
             trees},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {451-459},
   Year = {2015},
   Month = {January},
   Abstract = {The modern scale of data has brought new challenges to
             Bayesian inference. In particular, conventional MCMC
             algorithms are computationally very expensive for large data
             sets. A promising approach to solve this problem is
             embarrassingly parallel MCMC (EP-MCMC), which first
             partitions the data into multiple subsets and runs
             independent sampling algorithms on each subset. The subset
             posterior draws are then aggregated via some combining rules
             to obtain the final approximation. Existing EP-MCMC
             algorithms are limited by approximation accuracy and
             difficulty in resampling. In this article, we propose a new
             EP-MCMC algorithm PART that solves these problems. The new
             algorithm applies random partition trees to combine the
             subset posterior draws, which is distribution-free, easy to
             resample from and can adapt to multiple scales. We provide
             theoretical justification and extensive experiments
             illustrating empirical performance.},
   Key = {fds322029}
}

@article{fds322555,
   Author = {Wang, Y and Dunson, D},
   Title = {Probabilistic curve learning: Coulomb repulsion and the
             electrostatic Gaussian process},
   Journal = {Advances in Neural Information Processing
             Systems},
   Volume = {2015-January},
   Pages = {1738-1746},
   Year = {2015},
   Month = {January},
   Abstract = {Learning of low dimensional structure in multidimensional
             data is a canonical problem in machine learning. One common
             approach is to suppose that the observed data are close to a
             lower-dimensional smooth manifold. There are a rich variety
             of manifold learning methods available, which allow mapping
             of data points to the manifold. However, there is a clear
             lack of probabilistic methods that allow learning of the
             manifold along with the generative distribution of the
             observed data. The best attempt is the Gaussian process
             latent variable model (GP-LVM), but identifiability issues
             lead to poor performance. We solve these issues by proposing
             a novel Coulomb repulsive process (Corp) for locations of
             points on the manifold, inspired by physical models of
             electrostatic interactions among particles. Combining this
             process with a GP prior for the mapping function yields a
             novel electrostatic GP (electroGP) process. Focusing on the
             simple case of a one-dimensional manifold, we develop
             efficient inference algorithms, and illustrate substantially
             improved performance in a variety of experiments including
             filling in missing frames in video.},
   Key = {fds322555}
}

@article{fds322543,
   Author = {Kunihama, T and Dunson, DB},
   Title = {Nonparametric Bayes inference on conditional
             independence},
   Journal = {Biometrika},
   Volume = {103},
   Number = {1},
   Pages = {35-47},
   Publisher = {Oxford University Press (OUP)},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1093/biomet/asv060},
   Abstract = {© 2016 Biometrika Trust. In many application areas, a
             primary focus is on assessing evidence in the data refuting
             the assumption of independence of Y and X conditionally on
             Z, with Y response variables, X predictors of interest, and
             Z covariates. Ideally, one would have methods available that
             avoid parametric assumptions, allow Y, X, Z to be random
             variables on arbitrary spaces with arbitrary dimension, and
             accommodate rapid consideration of different candidate
             predictors. As a formal decision-theoretic approach has
             clear disadvantages in this context, we instead rely on an
             encompassing nonparametric Bayes model for the joint
             distribution of Y, X and Z, with conditional mutual
             information used as a summary of the strength of conditional
             dependence. We construct a functional of the encompassing
             model and empirical measure for estimation of conditional
             mutual information. The implementation relies on a single
             Markov chain Monte Carlo run under the encompassing model,
             with conditional mutual information for candidate models
             calculated as a byproduct. We provide an asymptotic theory
             supporting the approach, and apply the method to variable
             selection. The methods are illustrated through simulations
             and criminology applications.},
   Doi = {10.1093/biomet/asv060},
   Key = {fds322543}
}

@article{fds257829,
   Author = {Lock, EF and Soldano, KL and Garrett, ME and Cope, H and Markunas, CA and Fuchs, H and Grant, G and Dunson, DB and Gregory, SG and Ashley-Koch,
             AE},
   Title = {Joint eQTL assessment of whole blood and dura mater tissue
             from individuals with Chiari type I malformation.},
   Journal = {Bmc Genomics},
   Volume = {16},
   Pages = {11},
   Year = {2015},
   Month = {January},
   url = {http://dx.doi.org/10.1186/s12864-014-1211-8},
   Abstract = {BACKGROUND: Expression quantitative trait loci (eQTL) play
             an important role in the regulation of gene expression. Gene
             expression levels and eQTLs are expected to vary from tissue
             to tissue, and therefore multi-tissue analyses are necessary
             to fully understand complex genetic conditions in humans.
             Dura mater tissue likely interacts with cranial bone growth
             and thus may play a role in the etiology of Chiari Type I
             Malformation (CMI) and related conditions, but it is often
             inaccessible and its gene expression has not been well
             studied. A genetic basis to CMI has been established;
             however, the specific genetic risk factors are not well
             characterized. RESULTS: We present an assessment of eQTLs
             for whole blood and dura mater tissue from individuals with
             CMI. A joint-tissue analysis identified 239 eQTLs in either
             dura or blood, with 79% of these eQTLs shared by both
             tissues. Several identified eQTLs were novel and these
             implicate genes involved in bone development (IPO8, XYLT1,
             and PRKAR1A), and ribosomal pathways related to marrow and
             bone dysfunction, as potential candidates in the development
             of CMI. CONCLUSIONS: Despite strong overall heterogeneity in
             expression levels between blood and dura, the majority of
             cis-eQTLs are shared by both tissues. The power to detect
             shared eQTLs was improved by using an integrative
             statistical approach. The identified tissue-specific and
             shared eQTLs provide new insight into the genetic basis for
             CMI and related conditions.},
   Doi = {10.1186/s12864-014-1211-8},
   Key = {fds257829}
}

@article{fds257830,
   Author = {Li, D and Wilcox, AJ and Dunson, DB},
   Title = {Benchmark pregnancy rates and the assessment of post-coital
             contraceptives: an update.},
   Journal = {Contraception},
   Volume = {91},
   Number = {4},
   Pages = {344-349},
   Year = {2015},
   Month = {April},
   ISSN = {0010-7824},
   url = {http://dx.doi.org/10.1016/j.contraception.2015.01.002},
   Abstract = {In 2001, we provided benchmark estimates of probability of
             pregnancy given a single act of intercourse. Those
             calculations assumed that intercourse and ovulation are
             independent. Subsequent research has shown that this
             assumption is not valid. We provide here an update of
             previous benchmark estimates.We reanalyze earlier data from
             two North Carolina studies that collected daily urine
             samples and recorded daily intercourse for multiple
             menstrual cycles. One study comprised 68 sexually active
             women with either an intrauterine device or tubal ligation.
             The second was of 221 women who planned to become pregnant
             and had discontinued use of any birth control at enrollment.
             Participants had no known fertility problems. New
             statistical analyses were based on Monte Carlo simulations
             and Bayesian methods.The probability that a single act of
             intercourse occurs within a woman's fertile window is 25%,
             compared with 20% in previous calculations. The probability
             of pregnancy with intercourse on a given menstrual cycle day
             is correspondingly higher than previously estimated, with
             the largest increases occurring on menstrual days 12-22.
             These increases are, however, fairly small (for example, the
             peak chance of conception on menstrual day 13 increased from
             8.6% to 9.7%).Previous benchmark rates of pregnancy with one
             act of intercourse were moderately underestimated due to a
             mistaken assumption about the independence of intercourse
             and ovulation.The chance of pregnancy with a single act of
             unprotected intercourse is greater than previously
             estimated. Previous benchmarks may underestimate the
             efficacy of post-coital contraception.},
   Doi = {10.1016/j.contraception.2015.01.002},
   Key = {fds257830}
}

@article{fds257863,
   Author = {Hua, Z and Zhu, H and Dunson, DB},
   Title = {Semiparametric Bayes local additive models for longitudinal
             data.},
   Journal = {Statistics in Biosciences},
   Volume = {7},
   Number = {1},
   Pages = {90-107},
   Year = {2015},
   Month = {May},
   ISSN = {1867-1764},
   url = {http://dx.doi.org/10.1007/s12561-013-9104-y},
   Abstract = {In longitudinal data analysis, there is great interest in
             assessing the impact of predictors on the time-varying
             trajectory in a response variable. In such settings, an
             important issue is to account for heterogeneity in the shape
             of the trajectory among subjects, while allowing the impact
             of the predictors to vary across subjects. We propose a
             flexible semiparametric Bayes approach for addressing this
             issue relying on a local partition process prior, which
             allows flexible local borrowing of information across
             subjects. Local hypothesis testing and credible bands are
             developed for the identification of time windows across
             which a predictor has a significant impact, while adjusting
             for multiple comparisons. Posterior computation proceeds via
             an efficient MCMC algorithm using the exact block Gibbs
             sampler. The methods are assessed using simulation studies
             and applied to a yeast cell-cycle gene expression data
             set.},
   Doi = {10.1007/s12561-013-9104-y},
   Key = {fds257863}
}

@article{fds322552,
   Author = {Guo, F and Dunson, DB},
   Title = {Uncovering systematic bias in ratings across categories: A
             Bayesian approach},
   Journal = {Recsys 2015 Proceedings of the 9th Acm Conference on
             Recommender Systems},
   Pages = {317-320},
   Year = {2015},
   Month = {September},
   ISBN = {9781450336925},
   url = {http://dx.doi.org/10.1145/2792838.2799683},
   Abstract = {© 2015 ACM. Recommender systems are routinely equipped with
             standardized taxonomy that associates each item with one or
             more categories or genres. Although such information does
             not directly imply the quality of an item, the distribution
             of ratings vary greatly across categories, e.g. animation
             movies may generally receive higher ratings than action
             movies. While it is a natural outcome given the diversity
             and heterogeneity of both users and items, it makes directly
             aggregated ratings, which are commonly used to guide users'
             choice by reecting the overall quality of an item,
             incomparable across categories and hence prone to fairness
             and diversity issues. This paper aims to uncover and
             calibrate systematic category-wise biases for
             discrete-valued ratings. We propose a novel Bayesian
             multiplicative probit model that treats the ination or
             deation of mean rating for a combination of categories as
             multiplicatively contributed from category-specific
             parameters. The posterior distribution of those parameters,
             as inferred from data, can capture the bias for all possible
             combinations of categories, thus enabling statistically
             efficient estimation and principled rating
             calibration.},
   Doi = {10.1145/2792838.2799683},
   Key = {fds322552}
}

@article{fds322550,
   Author = {Guhaniyogi, R and Dunson, DB},
   Title = {Bayesian Compressed Regression},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1500-1514},
   Publisher = {Informa UK Limited},
   Year = {2015},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2014.969425},
   Abstract = {© 2015, © American Statistical Association. As an
             alternative to variable selection or shrinkage in
             high-dimensional regression, we propose to randomly compress
             the predictors prior to analysis. This dramatically reduces
             storage and computational bottlenecks, performing well when
             the predictors can be projected to a low-dimensional linear
             subspace with minimal loss of information about the
             response. As opposed to existing Bayesian dimensionality
             reduction approaches, the exact posterior distribution
             conditional on the compressed data is available
             analytically, speeding up computation by many orders of
             magnitude while also bypassing robustness issues due to
             convergence and mixing problems with MCMC. Model averaging
             is used to reduce sensitivity to the random projection
             matrix, while accommodating uncertainty in the subspace
             dimension. Strong theoretical support is provided for the
             approach by showing near parametric convergence rates for
             the predictive density in the large p small n asymptotic
             paradigm. Practical performance relative to competitors is
             illustrated in simulations and real data
             applications.},
   Doi = {10.1080/01621459.2014.969425},
   Key = {fds322550}
}

@article{fds322546,
   Author = {Fox, EB and Dunson, DB and Airoldi, EM},
   Title = {Bayesian nonparametric covariance regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {16},
   Pages = {2501-2542},
   Year = {2015},
   Month = {December},
   Abstract = {© 2015 Emily B. Fox and David B. Dunson. Capturing
             predictor-dependent correlations amongst the elements of a
             multivariate response vector is fundamental to numerous
             applied domains, including neuroscience, epidemiology, and
             finance. Although there is a rich literature on methods for
             allowing the variance in a univariate regression model to
             vary with predictors, relatively little has been done in the
             multivariate case. As a motivating example, we consider the
             Google Flu Trends data set, which provides indirect
             measurements of influenza incidence at a large set of
             locations over time (our predictor). To accurately
             characterize temporally evolving influenza incidence across
             regions, it is important to develop statistical methods for
             a time-varying covariance matrix. Importantly, the locations
             provide a redundant set of measurements and do not yield a
             sparse nor static spatial dependence structure. We propose
             to reduce dimensionality and induce a flexible Bayesian
             nonparametric covariance regression model by relating these
             location-specific trajectories to a lower-dimensional
             subspace through a latent factor model with
             predictor-dependent factor loadings. These loadings are in
             terms of a collection of basis functions that vary
             nonparametrically over the predictor space. Such low-rank
             approximations are in contrast to sparse precision
             assumptions, and are appropriate in a wide range of
             applications. Our formulation aims to address three
             challenges: scaling to large p domains, coping with missing
             values, and allowing an irregular grid of observations. The
             model is shown to be highly flexible, while leading to a
             computationally feasible implementation via Gibbs sampling.
             The ability to scale to large p domains and cope with
             missing values is fundamental in analyzing the Google Flu
             Trends data.},
   Key = {fds322546}
}

@article{fds322547,
   Author = {Yazdani, A and Dunson, DB},
   Title = {A hybrid bayesian approach for genome-wide association
             studies on related individuals.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {31},
   Number = {24},
   Pages = {3890-3896},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1093/bioinformatics/btv496},
   Abstract = {Both single marker and simultaneous analysis face challenges
             in GWAS due to the large number of markers genotyped for a
             small number of subjects. This large p small n problem is
             particularly challenging when the trait under investigation
             has low heritability.In this article, we propose a two-stage
             approach that is a hybrid method of single and simultaneous
             analysis designed to improve genomic prediction of complex
             traits. In the first stage, we use a Bayesian independent
             screening method to select the most promising SNPs. In the
             second stage, we rely on a hierarchical model to analyze the
             joint impact of the selected markers. The model is designed
             to take into account familial dependence in the different
             subjects, while using local-global shrinkage priors on the
             marker effects.We evaluate the performance in simulation
             studies, and consider an application to animal breeding
             data. The illustrative data analysis reveals an encouraging
             result in terms of prediction performance and computational
             cost.},
   Doi = {10.1093/bioinformatics/btv496},
   Key = {fds322547}
}

@article{fds322548,
   Author = {Lock, EF and Dunson, DB},
   Title = {Shared kernel Bayesian screening.},
   Journal = {Biometrika},
   Volume = {102},
   Number = {4},
   Pages = {829-842},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asv032},
   Abstract = {This article concerns testing for equality of distribution
             between groups. We focus on screening variables with shared
             distributional features such as common support, modes and
             patterns of skewness. We propose a Bayesian testing method
             using kernel mixtures, which improves performance by
             borrowing information across the different variables and
             groups through shared kernels and a common probability of
             group differences. The inclusion of shared kernels in a
             finite mixture, with Dirichlet priors on the weights, leads
             to a simple framework for testing that scales well for
             high-dimensional data. We provide closed asymptotic forms
             for the posterior probability of equivalence in two groups
             and prove consistency under model misspecification. The
             method is applied to DNA methylation array data from a
             breast cancer study, and compares favourably to competitors
             when Type I error is estimated via permutation.},
   Doi = {10.1093/biomet/asv032},
   Key = {fds322548}
}

@article{fds322549,
   Author = {Bhattacharya, A and Pati, D and Pillai, NS and Dunson,
             DB},
   Title = {Dirichlet-Laplace priors for optimal shrinkage.},
   Journal = {Journal of the American Statistical Association},
   Volume = {110},
   Number = {512},
   Pages = {1479-1490},
   Year = {2015},
   Month = {December},
   url = {http://dx.doi.org/10.1080/01621459.2014.960967},
   Abstract = {Penalized regression methods, such as L1 regularization, are
             routinely used in high-dimensional applications, and there
             is a rich literature on optimality properties under sparsity
             assumptions. In the Bayesian paradigm, sparsity is routinely
             induced through two-component mixture priors having a
             probability mass at zero, but such priors encounter daunting
             computational problems in high dimensions. This has
             motivated continuous shrinkage priors, which can be
             expressed as global-local scale mixtures of Gaussians,
             facilitating computation. In contrast to the frequentist
             literature, little is known about the properties of such
             priors and the convergence and concentration of the
             corresponding posterior distribution. In this article, we
             propose a new class of Dirichlet-Laplace priors, which
             possess optimal posterior concentration and lead to
             efficient posterior computation. Finite sample performance
             of Dirichlet-Laplace priors relative to alternatives is
             assessed in simulated and real data examples.},
   Doi = {10.1080/01621459.2014.960967},
   Key = {fds322549}
}

@article{fds328949,
   Author = {Chabout, J and Sarkar, A and Patel, SR and Radden, T and Dunson, DB and Fisher, SE and Jarvis, ED},
   Title = {A Foxp2 Mutation Implicated in Human Speech Deficits Alters
             Sequencing of Ultrasonic Vocalizations in Adult Male
             Mice.},
   Journal = {Frontiers in Behavioral Neuroscience},
   Volume = {10},
   Pages = {197},
   Year = {2016},
   url = {http://dx.doi.org/10.3389/fnbeh.2016.00197},
   Abstract = {Development of proficient spoken language skills is
             disrupted by mutations of the FOXP2 transcription factor. A
             heterozygous missense mutation in the KE family causes
             speech apraxia, involving difficulty producing words with
             complex learned sequences of syllables. Manipulations in
             songbirds have helped to elucidate the role of this gene in
             vocal learning, but findings in non-human mammals have been
             limited or inconclusive. Here, we performed a systematic
             study of ultrasonic vocalizations (USVs) of adult male mice
             carrying the KE family mutation. Using novel statistical
             tools, we found that Foxp2 heterozygous mice did not have
             detectable changes in USV syllable acoustic structure, but
             produced shorter sequences and did not shift to more complex
             syntax in social contexts where wildtype animals did.
             Heterozygous mice also displayed a shift in the position of
             their rudimentary laryngeal motor cortex (LMC) layer-5
             neurons. Our findings indicate that although mouse USVs are
             mostly innate, the underlying contributions of FoxP2 to
             sequencing of vocalizations are conserved with
             humans.},
   Doi = {10.3389/fnbeh.2016.00197},
   Key = {fds328949}
}

@article{fds322545,
   Author = {Wang, X and Dunson, D and Leng, C},
   Title = {No penalty no tears: Least squares in high-dimensional
             linear models},
   Journal = {33rd International Conference on Machine Learning, Icml
             2016},
   Volume = {4},
   Pages = {2685-2706},
   Year = {2016},
   Month = {January},
   ISBN = {9781510829008},
   Abstract = {© 2016 by the author(s). Ordinary least squares (OI,S) is
             the default method for fitting linear models, but is not
             applicable for problems with dimensionality larger than the
             sample size. For these problems, we advocate the use of a
             generalized version of OLS motivated by ridge regression,
             and propose two novel three-step algorithms involving least
             squares fitting and hard thresholding. The algorithms are
             methodologically simple to understand intuitively,
             computationally easy to implement efficiently, and
             theoretically appealing for choosing models consistently.
             Numerical exercises comparing our methods with
             penalization-based approaches in simulations and data
             analyses illustrate the great potential of the proposed
             algorithms.},
   Key = {fds322545}
}

@article{fds327031,
   Author = {Wang, X and Dunson, D and Leng, C},
   Title = {DECOrrelated feature space partitioning for distributed
             sparse regression},
   Journal = {Advances in Neural Information Processing
             Systems},
   Pages = {802-810},
   Year = {2016},
   Month = {January},
   Abstract = {© 2016 NIPS Foundation - All Rights Reserved. Fitting
             statistical models is computationally challenging when the
             sample size or the dimension of the dataset is huge. An
             attractive approach for down-scaling the problem size is to
             first partition the dataset into subsets and then fit using
             distributed algorithms. The dataset can be partitioned
             either horizontally (in the sample space) or vertically (in
             the feature space). While the majority of the literature
             focuses on sample space partitioning, feature space
             partitioning is more effective when p> n. Existing methods
             for partitioning features, however, are either vulnerable to
             high correlations or inefficient in reducing the model
             dimension. In this paper, we solve these problems through a
             new embarrassingly parallel framework named DECO for
             distributed variable selection and parameter estimation. In
             DECO, variables are first partitioned and allocated to m
             distributed workers. The decorrelated subset data within
             each worker are then fitted via any algorithm designed for
             high-dimensional problems. We show that by incorporating the
             decorrelation step, DECO can achieve consistent variable
             selection and parameter estimation on each subset with
             (almost) no assumptions. In addition, the convergence rate
             is nearly minimax optimal for both sparse and weakly sparse
             models and does NOT depend on the partition number m.
             Extensive numerical experiments are provided to illustrate
             the performance of the new framework.},
   Key = {fds327031}
}

@article{fds322541,
   Author = {Zhou, J and Herring, AH and Bhattacharya, A and Olshan, AF and Dunson,
             DB and National Birth Defects Prevention Study},
   Title = {Nonparametric Bayes modeling for case control studies with
             many predictors.},
   Journal = {Biometrics},
   Volume = {72},
   Number = {1},
   Pages = {184-192},
   Year = {2016},
   Month = {March},
   url = {http://dx.doi.org/10.1111/biom.12411},
   Abstract = {It is common in biomedical research to run case-control
             studies involving high-dimensional predictors, with the main
             goal being detection of the sparse subset of predictors
             having a significant association with disease. Usual
             analyses rely on independent screening, considering each
             predictor one at a time, or in some cases on logistic
             regression assuming no interactions. We propose a
             fundamentally different approach based on a nonparametric
             Bayesian low rank tensor factorization model for the
             retrospective likelihood. Our model allows a very flexible
             structure in characterizing the distribution of multivariate
             variables as unknown and without any linear assumptions as
             in logistic regression. Predictors are excluded only if they
             have no impact on disease risk, either directly or through
             interactions with other predictors. Hence, we obtain an
             omnibus approach for screening for important predictors.
             Computation relies on an efficient Gibbs sampler. The
             methods are shown to have high power and low false discovery
             rates in simulation studies, and we consider an application
             to an epidemiology study of birth defects.},
   Doi = {10.1111/biom.12411},
   Key = {fds322541}
}

@article{fds322542,
   Author = {Tang, K and Dunson, DB and Su, Z and Liu, R and Zhang, J and Dong,
             J},
   Title = {Subspace segmentation by dense block and sparse
             representation.},
   Journal = {Neural Networks : the Official Journal of the International
             Neural Network Society},
   Volume = {75},
   Pages = {66-76},
   Year = {2016},
   Month = {March},
   url = {http://dx.doi.org/10.1016/j.neunet.2015.11.011},
   Abstract = {Subspace segmentation is a fundamental topic in computer
             vision and machine learning. However, the success of many
             popular methods is about independent subspace segmentation
             instead of the more flexible and realistic disjoint subspace
             segmentation. Focusing on the disjoint subspaces, we provide
             theoretical and empirical evidence of inferior performance
             for popular algorithms such as LRR. To solve these problems,
             we propose a novel dense block and sparse representation
             (DBSR) for subspace segmentation and provide related
             theoretical results. DBSR minimizes a combination of the
             1,1-norm and maximum singular value of the representation
             matrix, leading to a combination of dense block and
             sparsity. We provide experimental results for synthetic and
             benchmark data showing that our method can outperform the
             state-of-the-art.},
   Doi = {10.1016/j.neunet.2015.11.011},
   Key = {fds322542}
}

@article{fds322540,
   Author = {Yang, Y and Dunson, DB},
   Title = {Bayesian manifold regression},
   Journal = {The Annals of Statistics},
   Volume = {44},
   Number = {2},
   Pages = {876-905},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2016},
   Month = {April},
   url = {http://dx.doi.org/10.1214/15-AOS1390},
   Abstract = {© Institute of Mathematical Statistics, 2016. There is
             increasing interest in the problem of nonparametric
             regression with high-dimensional predictors. When the number
             of predictors D is large, one encounters a daunting problem
             in attempting to estimate aD-dimensional surface based on
             limited data. Fortunately, in many applications, the support
             of the data is concentrated on a d-dimensional subspace with
             d ≤ D. Manifold learning attempts to estimate this
             subspace. Our focus is on developing computationally
             tractable and theoretically supported Bayesian nonparametric
             regression methods in this context. When the subspace
             corresponds to a locally-Euclidean compact Riemannian
             manifold, we show that a Gaussian process regression
             approach can be applied that leads to the minimax optimal
             adaptive rate in estimating the regression function under
             some conditions. The proposed model bypasses the need to
             estimate the manifold, and can be implemented using standard
             algorithms for posterior computation in Gaussian processes.
             Finite sample performance is illustrated in a data analysis
             example.},
   Doi = {10.1214/15-AOS1390},
   Key = {fds322540}
}

@article{fds329117,
   Author = {Yang, Y and Dunson, DB},
   Title = {Bayesian Conditional Tensor Factorizations for
             High-Dimensional Classification},
   Journal = {Journal of the American Statistical Association},
   Volume = {111},
   Number = {514},
   Pages = {656-669},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {April},
   url = {http://dx.doi.org/10.1080/01621459.2015.1029129},
   Abstract = {© 2016, © American Statistical Association. In many
             application areas, data are collected on a categorical
             response and high-dimensional categorical predictors, with
             the goals being to build a parsimonious model for
             classification while doing inferences on the important
             predictors. In settings such as genomics, there can be
             complex interactions among the predictors. By using a
             carefully structured Tucker factorization, we define a model
             that can characterize any conditional probability, while
             facilitating variable selection and modeling of higher-order
             interactions. Following a Bayesian approach, we propose a
             Markov chain Monte Carlo algorithm for posterior computation
             accommodating uncertainty in the predictors to be included.
             Under near low-rank assumptions, the posterior distribution
             for the conditional probability is shown to achieve close to
             the parametric rate of contraction even in ultra
             high-dimensional settings. The methods are illustrated using
             simulation examples and biomedical applications.
             Supplementary materials for this article are available
             online.},
   Doi = {10.1080/01621459.2015.1029129},
   Key = {fds329117}
}

@article{fds322539,
   Author = {Kabisa, S and Dunson, DB and Morris, JS},
   Title = {Online Variational Bayes Inference for High-Dimensional
             Correlated Data},
   Journal = {Journal of Computational and Graphical Statistics : a Joint
             Publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {25},
   Number = {2},
   Pages = {426-444},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {April},
   url = {http://dx.doi.org/10.1080/10618600.2014.998336},
   Abstract = {© 2016 American Statistical Association, Institute of
             Mathematical Statistics, and Interface Foundation of North
             America. High-dimensional data with hundreds of thousands of
             observations are becoming commonplace in many disciplines.
             The analysis of such data poses many computational
             challenges, especially when the observations are correlated
             over time and/or across space. In this article, we propose
             flexible hierarchical regression models for analyzing such
             data that accommodate serial and/or spatial correlation. We
             address the computational challenges involved in fitting
             these models by adopting an approximate inference framework.
             We develop an online variational Bayes algorithm that works
             by incrementally reading the data into memory one portion at
             a time. The performance of the method is assessed through
             simulation studies. The methodology is applied to analyze
             signal intensity in MRI images of subjects with knee
             osteoarthritis, using data from the Osteoarthritis
             Initiative. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/10618600.2014.998336},
   Key = {fds322539}
}

@article{fds329994,
   Author = {Ovaskainen, O and Abrego, N and Halme, P and Dunson,
             D},
   Title = {Using latent variable models to identify large networks of
             species-to-species associations at different spatial
             scales},
   Journal = {Methods in Ecology and Evolution},
   Volume = {7},
   Number = {5},
   Pages = {549-555},
   Publisher = {WILEY},
   Editor = {Warton, D},
   Year = {2016},
   Month = {May},
   url = {http://dx.doi.org/10.1111/2041-210X.12501},
   Abstract = {© 2015 The Authors. We present a hierarchical latent
             variable model that partitions variation in species
             occurrences and co-occurrences simultaneously at multiple
             spatial scales. We illustrate how the parameterized model
             can be used to predict the occurrences of a species by using
             as predictors not only the environmental covariates, but
             also the occurrences of all other species, at all spatial
             scales. We leverage recent progress in Bayesian latent
             variable models to implement a computationally effective
             algorithm that enables one to consider large communities and
             extensive sampling schemes. We exemplify the framework with
             a community of 98 fungal species sampled in c. 22 500 dead
             wood units in 230 plots in 29 beech forests. The networks
             identified by correlations and partial correlations were
             consistent, as were networks for natural and managed
             forests, but networks at different spatial scales were
             dissimilar. Accounting for the occurrences of the other
             species roughly doubled the predictive powers of the models
             compared to accounting for environmental covariates
             only.},
   Doi = {10.1111/2041-210X.12501},
   Key = {fds329994}
}

@article{fds322538,
   Author = {Guhaniyogi, R and Dunson, DB},
   Title = {Compressed Gaussian process for manifold
             regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {17},
   Year = {2016},
   Month = {May},
   Abstract = {©2016 Rajarshi Guhaniyogi and David B. Dunson.
             Nonparametric regression for large numbers of features (p)
             is an increasingly important problem. If the sample size n
             is massive, a common strategy is to partition the feature
             space, and then separately apply simple models to each
             partition set. This is not ideal when n is modest relative
             to p, and we propose an alternative approach relying on
             random compression of the feature vector combined with
             Gaussian process regression. The proposed approach is
             particularly motivated by the setting in which the response
             is conditionally independent of the features given the
             projection to a low dimensional manifold. Conditionally on
             the random compression matrix and a smoothness parameter,
             the posterior distribution for the regression surface and
             posterior predictive distributions are available
             analytically. Running the analysis in parallel for many
             random compression matrices and smoothness parameters, model
             averaging is used to combine the results. The algorithm can
             be implemented rapidly even in very large p and moderately
             large n nonparametric regression, has strong theoretical
             justification, and is found to yield state of the art
             predictive performance.},
   Key = {fds322538}
}

@article{fds322536,
   Author = {Kunihama, T and Herring, AH and Halpern, CT and Dunson,
             DB},
   Title = {Nonparametric Bayes modeling with sample survey
             weights},
   Journal = {Statistics & Probability Letters},
   Volume = {113},
   Pages = {41-48},
   Publisher = {Elsevier BV},
   Year = {2016},
   Month = {June},
   url = {http://dx.doi.org/10.1016/j.spl.2016.02.009},
   Abstract = {© 2016 Elsevier B.V. In population studies, it is standard
             to sample data via designs in which the population is
             divided into strata, with the different strata assigned
             different probabilities of inclusion. Although there have
             been some proposals for including sample survey weights into
             Bayesian analyses, existing methods require complex models
             or ignore the stratified design underlying the survey
             weights. We propose a simple approach based on modeling the
             distribution of the selected sample as a mixture, with the
             mixture weights appropriately adjusted, while accounting for
             uncertainty in the adjustment. We focus for simplicity on
             Dirichlet process mixtures but the proposed approach can be
             applied more broadly. We sketch a simple Markov chain Monte
             Carlo algorithm for computation, and assess the approach via
             simulations and an application.},
   Doi = {10.1016/j.spl.2016.02.009},
   Key = {fds322536}
}

@article{fds322537,
   Author = {Rao, V and Lin, L and Dunson, DB},
   Title = {Data augmentation for models based on rejection
             sampling.},
   Journal = {Biometrika},
   Volume = {103},
   Number = {2},
   Pages = {319-335},
   Year = {2016},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asw005},
   Abstract = {We present a data augmentation scheme to perform Markov
             chain Monte Carlo inference for models where data generation
             involves a rejection sampling algorithm. Our idea is a
             simple scheme to instantiate the rejected proposals
             preceding each data point. The resulting joint probability
             over observed and rejected variables can be much simpler
             than the marginal distribution over the observed variables,
             which often involves intractable integrals. We consider
             three problems: modelling flow-cytometry measurements
             subject to truncation; the Bayesian analysis of the matrix
             Langevin distribution on the Stiefel manifold; and Bayesian
             inference for a nonparametric Gaussian process density
             model. The latter two are instances of doubly-intractable
             Markov chain Monte Carlo problems, where evaluating the
             likelihood is intractable. Our experiments demonstrate
             superior performance over state-of-the-art sampling
             algorithms for such problems.},
   Doi = {10.1093/biomet/asw005},
   Key = {fds322537}
}

@article{fds329118,
   Author = {Canale, A and Dunson, DB},
   Title = {Multiscale bernstein polynomials for densities},
   Journal = {Statistica Sinica},
   Volume = {26},
   Number = {3},
   Pages = {1175-1195},
   Publisher = {Institute of Statistical Science},
   Year = {2016},
   Month = {July},
   url = {http://dx.doi.org/10.5705/ss.202015.0163},
   Abstract = {Our focus is on constructing a multiscale nonparametric
             prior for densities. The Bayes density estimation literature
             is dominated by single scale methods, with the exception of
             Polya trees, which favor overly-spiky densities even when
             the truth is smooth. We propose a multiscale Bernstein
             polynomial family of priors, which produce smooth
             realizations that do not rely on hard partitioning of the
             support. At each level in an infinitely-deep binary tree, we
             place a beta dictionary density; within a scale the
             densities are equivalent to Bernstein polynomials. Using a
             stick-breaking characterization, stochastically decreasing
             weights are allocated to the finer scale dictionary
             elements. A slice sampler is used for posterior computation,
             and properties are described. The method characterizes
             densities with locally-varying smoothness, and can produce a
             sequence of coarse to fine density estimates. An extension
             for Bayesian testing of group differences is introduced and
             applied to DNA methylation array data.},
   Doi = {10.5705/ss.202015.0163},
   Key = {fds329118}
}

@article{fds329993,
   Author = {Hultman, R and Mague, SD and Li, Q and Katz, BM and Michel, N and Lin, L and Wang, J and David, LK and Blount, C and Chandy, R and Carlson, D and Ulrich, K and Carin, L and Dunson, D and Kumar, S and Deisseroth, K and Moore, SD and Dzirasa, K},
   Title = {Dysregulation of Prefrontal Cortex-Mediated Slow-Evolving
             Limbic Dynamics Drives Stress-Induced Emotional
             Pathology.},
   Journal = {Neuron},
   Volume = {91},
   Number = {2},
   Pages = {439-452},
   Year = {2016},
   Month = {July},
   url = {http://dx.doi.org/10.1016/j.neuron.2016.05.038},
   Abstract = {Circuits distributed across cortico-limbic brain regions
             compose the networks that mediate emotional behavior. The
             prefrontal cortex (PFC) regulates ultraslow (<1 Hz)
             dynamics across these networks, and PFC dysfunction is
             implicated in stress-related illnesses including major
             depressive disorder (MDD). To uncover the mechanism whereby
             stress-induced changes in PFC circuitry alter emotional
             networks to yield pathology, we used a multi-disciplinary
             approach including in vivo recordings in mice and chronic
             social defeat stress. Our network model, inferred using
             machine learning, linked stress-induced behavioral pathology
             to the capacity of PFC to synchronize amygdala and VTA
             activity. Direct stimulation of PFC-amygdala circuitry with
             DREADDs normalized PFC-dependent limbic synchrony in
             stress-susceptible animals and restored normal behavior. In
             addition to providing insights into MDD mechanisms, our
             findings demonstrate an interdisciplinary approach that can
             be used to identify the large-scale network changes that
             underlie complex emotional pathologies and the specific
             network nodes that can be used to develop targeted
             interventions.},
   Doi = {10.1016/j.neuron.2016.05.038},
   Key = {fds329993}
}

@article{fds329116,
   Author = {Li, D and Heyer, L and Jennings, VH and Smith, CA and Dunson,
             DB},
   Title = {Personalised estimation of a woman's most fertile
             days.},
   Journal = {The European Journal of Contraception & Reproductive Health
             Care : the Official Journal of the European Society of
             Contraception},
   Volume = {21},
   Number = {4},
   Pages = {323-328},
   Year = {2016},
   Month = {August},
   url = {http://dx.doi.org/10.1080/13625187.2016.1196485},
   Abstract = {We propose a new, personalised approach of estimating a
             woman's most fertile days that only requires recording the
             first day of menses and can use a smartphone to convey this
             information to the user so that she can plan or prevent
             pregnancy.We performed a retrospective analysis of two
             cohort studies (a North Carolina-based study and the Early
             Pregnancy Study [EPS]) and a prospective multicentre trial
             (World Health Organization [WHO] study). The North Carolina
             study consisted of 68 sexually active women with either an
             intrauterine device or tubal ligation. The EPS comprised 221
             women who planned to become pregnant and had no known
             fertility problems. The WHO study consisted of 706 women
             from five geographically and culturally diverse settings.
             Bayesian statistical methods were used to design our
             proposed method, Dynamic Optimal Timing (DOT). Simulation
             studies were used to estimate the cumulative pregnancy
             risk.For the proposed method, simulation analyses indicated
             a 4.4% cumulative probability of pregnancy over 13 cycles
             with correct use. After a calibration window, this method
             flagged between 11 and 13 days when unprotected intercourse
             should be avoided per cycle. Eligible women should have
             cycle lengths between 20 and 40 days with a variability
             range less than or equal to 9 days.DOT can easily be
             implemented by computer or smartphone applications, allowing
             for women to make more informed decisions about their
             fertility. This approach is already incorporated into a
             patent-pending system and is available for free download on
             iPhones and Androids.},
   Doi = {10.1080/13625187.2016.1196485},
   Key = {fds329116}
}

@article{fds321837,
   Author = {Yin, R and Cornelis, B and Fodor, G and Ocon, N and Dunson, D and Daubechies, I},
   Title = {Removing cradle artifacts in X-ray images of
             paintings},
   Journal = {Siam Journal on Imaging Sciences},
   Volume = {9},
   Number = {3},
   Pages = {1247-1272},
   Publisher = {Society for Industrial & Applied Mathematics
             (SIAM)},
   Year = {2016},
   Month = {August},
   url = {http://dx.doi.org/10.1137/15M1053554},
   Abstract = {© 2016 Rujie Yin. We propose an algorithm that removes the
             visually unpleasant effects of cradling in X-ray images of
             panel paintings, with the goal of improving the X-ray image
             readability by art experts. The algorithm consists of three
             stages. In the first stage the location of the cradle is
             detected automatically and the grayscale inconsistency,
             caused by the thickness of the cradle, is corrected. In a
             second stage we use a method called morphological component
             analysis to separate the X-ray image into a so-called
             cartoon part and a texture part, where the latter contains
             mostly the wood grain from both the panel and the cradling.
             The algorithm next learns a Bayesian factor model that
             distinguishes between the texture patterns that originate
             from the cradle and those from other components such as the
             panel and/or the painting on the panel surface, and finally
             uses this to remove the textures associated with the cradle.
             We apply the algorithm to a number of historically important
             paintings on panel. We also show how it can be used to
             digitally remove stretcher artifacts from X-rays of
             paintings on canvas. We compare our results with those
             obtained manually by best current practices in art
             conservation as well as on a ground truth dataset,
             consisting of X-ray images of a painting before and after
             removal of the physically attached cradle.},
   Doi = {10.1137/15M1053554},
   Key = {fds321837}
}

@article{fds329114,
   Author = {Zhu, H and Strawn, N and Dunson, DB},
   Title = {Bayesian graphical models for multivariate functional
             data},
   Journal = {Journal of Machine Learning Research},
   Volume = {17},
   Pages = {1-27},
   Year = {2016},
   Month = {October},
   Abstract = {© 2016 Hongxiao Zhu, Nate Strawn, and David B. Dunson.
             Graphical models express conditional independence
             relationships among variables. Although methods for
             vector-valued data are well established, functional data
             graphical models remain underdeveloped. By functional data,
             we refer to data that are realizations of random functions
             varying over a continuum (e.g., images, signals). We
             introduce a notion of conditional independence between
             random functions, and construct a framework for Bayesian
             inference of undirected, decomposable graphs in the
             multivariate functional data context. This framework is
             based on extending Markov distributions and hyper Markov
             laws from random variables to random processes, providing a
             principled alternative to naive application of multivariate
             methods to discretized functional data. Markov properties
             facilitate the composition of likelihoods and priors
             according to the decomposition of a graph. Our focus is on
             Gaussian process graphical models using orthogonal basis
             expansions. We propose a hyper-inverse-Wishart-process prior
             for the covariance kernels of the infinite coeficient
             sequences of the basis expansion, and establish its
             existence and uniqueness. We also prove the strong hyper
             Markov property and the conjugacy of this prior under a
             finite rank condition of the prior kernel parameter.
             Stochastic search Markov chain Monte Carlo algorithms are
             developed for posterior inference, assessed through
             simulations, and applied to a study of brain activity and
             alcoholism.},
   Key = {fds329114}
}

@article{fds329115,
   Author = {Sarkar, A and Dunson, DB},
   Title = {Bayesian Nonparametric Modeling of Higher Order Markov
             Chains},
   Journal = {Journal of the American Statistical Association},
   Volume = {111},
   Number = {516},
   Pages = {1791-1803},
   Publisher = {Informa UK Limited},
   Year = {2016},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2015.1115763},
   Abstract = {© 2016 American Statistical Association. We consider the
             problem of flexible modeling of higher order Markov chains
             when an upper bound on the order of the chain is known but
             the true order and nature of the serial dependence are
             unknown. We propose Bayesian nonparametric methodology based
             on conditional tensor factorizations, which can characterize
             any transition probability with a specified maximal order.
             The methodology selects the important lags and captures
             higher order interactions among the lags, while also
             facilitating calculation of Bayes factors for a variety of
             hypotheses of interest. We design efficient Markov chain
             Monte Carlo algorithms for posterior computation, allowing
             for uncertainty in the set of important lags to be included
             and in the nature and order of the serial dependence. The
             methods are illustrated using simulation experiments and
             real world applications. Supplementary materials for this
             article are available online.},
   Doi = {10.1080/01621459.2015.1115763},
   Key = {fds329115}
}

@article{fds329112,
   Author = {Bhattacharya, A and Dunson, DB and Pati, D and Pillai,
             NS},
   Title = {Sub-optimality of some continuous shrinkage
             priors},
   Journal = {Stochastic Processes and Their Applications},
   Volume = {126},
   Number = {12},
   Pages = {3828-3842},
   Publisher = {Elsevier BV},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1016/j.spa.2016.08.007},
   Abstract = {© 2016 Two-component mixture priors provide a traditional
             way to induce sparsity in high-dimensional Bayes models.
             However, several aspects of such a prior, including
             computational complexities in high-dimensions,
             interpretation of exact zeros and non-sparse posterior
             summaries under standard loss functions, have motivated an
             amazing variety of continuous shrinkage priors, which can be
             expressed as global–local scale mixtures of Gaussians.
             Interestingly, we demonstrate that many commonly used
             shrinkage priors, including the Bayesian Lasso, do not have
             adequate posterior concentration in high-dimensional
             settings.},
   Doi = {10.1016/j.spa.2016.08.007},
   Key = {fds329112}
}

@article{fds329113,
   Author = {Durante, D and Dunson, DB},
   Title = {Locally adaptive dynamic networks},
   Journal = {The Annals of Applied Statistics},
   Volume = {10},
   Number = {4},
   Pages = {2203-2232},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1214/16-AOAS971},
   Abstract = {© Institute of Mathematical Statistics, 2016. Our focus is
             on realistically modeling and forecasting dynamic networks
             of face-to-face contacts among individuals. Important
             aspects of such data that lead to problems with current
             methods include the tendency of the contacts to move between
             periods of slow and rapid changes, and the dynamic
             heterogeneity in the actors’ connectivity behaviors.
             Motivated by this application, we develop a novel method for
             Locally Adaptive DYnamic (LADY) network inference. The
             proposed model relies on a dynamic latent space
             representation in which each actor’s position evolves in
             time via stochastic differential equations. Using a
             state-space representation for these stochastic processes
             and Pólya-gamma data augmentation, we develop an efficient
             MCMC algorithm for posterior inference along with tractable
             procedures for online updating and forecasting of future
             networks. We evaluate performance in simulation studies, and
             consider an application to face-to-face contacts among
             individuals in a primary school.},
   Doi = {10.1214/16-AOAS971},
   Key = {fds329113}
}

@article{fds327030,
   Author = {Datta, J and Dunson, DB},
   Title = {Bayesian inference on quasi-sparse count
             data.},
   Journal = {Biometrika},
   Volume = {103},
   Number = {4},
   Pages = {971-983},
   Year = {2016},
   Month = {December},
   url = {http://dx.doi.org/10.1093/biomet/asw053},
   Abstract = {There is growing interest in analysing high-dimensional
             count data, which often exhibit quasi-sparsity corresponding
             to an overabundance of zeros and small nonzero counts.
             Existing methods for analysing multivariate count data via
             Poisson or negative binomial log-linear hierarchical models
             with zero-inflation cannot flexibly adapt to quasi-sparse
             settings. We develop a new class of continuous local-global
             shrinkage priors tailored to quasi-sparse counts.
             Theoretical properties are assessed, including flexible
             posterior concentration and stronger control of false
             discoveries in multiple testing. Simulation studies
             demonstrate excellent small-sample properties relative to
             competing methods. We use the method to detect rare
             mutational hotspots in exome sequencing data and to identify
             North American cities most impacted by terrorism.},
   Doi = {10.1093/biomet/asw053},
   Key = {fds327030}
}

@article{fds325339,
   Author = {Johndrow, JE and Bhattacharya, A and Dunson, DB},
   Title = {TENSOR DECOMPOSITIONS AND SPARSE LOG-LINEAR
             MODELS.},
   Journal = {The Annals of Statistics},
   Volume = {45},
   Number = {1},
   Pages = {1-38},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1214/15-AOS1414},
   Abstract = {Contingency table analysis routinely relies on log-linear
             models, with latent structure analysis providing a common
             alternative. Latent structure models lead to a reduced rank
             tensor factorization of the probability mass function for
             multivariate categorical data, while log-linear models
             achieve dimensionality reduction through sparsity. Little is
             known about the relationship between these notions of
             dimensionality reduction in the two paradigms. We derive
             several results relating the support of a log-linear model
             to nonnegative ranks of the associated probability tensor.
             Motivated by these findings, we propose a new collapsed
             Tucker class of tensor decompositions, which bridge existing
             PARAFAC and Tucker decompositions, providing a more flexible
             framework for parsimoniously characterizing multivariate
             categorical data. Taking a Bayesian approach to inference,
             we illustrate empirical advantages of the new
             decompositions.},
   Doi = {10.1214/15-AOS1414},
   Key = {fds325339}
}

@article{fds336993,
   Author = {Durante, D and Dunson, DB and Vogelstein, JT},
   Title = {Nonparametric Bayes Modeling of Populations of
             Networks},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {520},
   Pages = {1516-1530},
   Publisher = {AMER STATISTICAL ASSOC},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2017.1219260},
   Doi = {10.1080/01621459.2017.1219260},
   Key = {fds336993}
}

@article{fds326570,
   Author = {Lin, L and St Thomas and B and Zhu, H and Dunson, DB},
   Title = {Extrinsic local regression on manifold-valued
             data.},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {519},
   Pages = {1261-1273},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2016.1208615},
   Abstract = {We propose an extrinsic regression framework for modeling
             data with manifold valued responses and Euclidean
             predictors. Regression with manifold responses has wide
             applications in shape analysis, neuroscience, medical
             imaging and many other areas. Our approach embeds the
             manifold where the responses lie onto a higher dimensional
             Euclidean space, obtains a local regression estimate in that
             space, and then projects this estimate back onto the image
             of the manifold. Outside the regression setting both
             intrinsic and extrinsic approaches have been proposed for
             modeling i.i.d manifold-valued data. However, to our
             knowledge our work is the first to take an extrinsic
             approach to the regression problem. The proposed extrinsic
             regression framework is general, computationally efficient
             and theoretically appealing. Asymptotic distributions and
             convergence rates of the extrinsic regression estimates are
             derived and a large class of examples are considered
             indicating the wide applicability of our
             approach.},
   Doi = {10.1080/01621459.2016.1208615},
   Key = {fds326570}
}

@article{fds341600,
   Author = {Dunson, D and Fryzlewicz, P},
   Title = {Report of the editors-2016},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {79},
   Number = {1},
   Pages = {3-4},
   Year = {2017},
   Month = {January},
   url = {http://dx.doi.org/10.1111/rssb.12220},
   Doi = {10.1111/rssb.12220},
   Key = {fds341600}
}

@article{fds326219,
   Author = {Dunson, DB},
   Title = {Toward automated prior choice},
   Journal = {Statistical Science},
   Volume = {32},
   Number = {1},
   Pages = {41-43},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2017},
   Month = {February},
   url = {http://dx.doi.org/10.1214/16-STS607},
   Doi = {10.1214/16-STS607},
   Key = {fds326219}
}

@article{fds333226,
   Author = {Abrego, N and Dunson, D and Halme, P and Salcedo, I and Ovaskainen,
             O},
   Title = {Wood-inhabiting fungi with tight associations with other
             species have declined as a response to forest
             management},
   Journal = {Oikos},
   Volume = {126},
   Number = {2},
   Publisher = {WILEY},
   Year = {2017},
   Month = {February},
   url = {http://dx.doi.org/10.1111/oik.03674},
   Abstract = {© 2016 The Authors Research on mutualistic and antagonistic
             networks, such as plant–pollinator and host–parasite
             networks, has shown that species interactions can influence
             and be influenced by the responses of species to
             environmental perturbations. Here we examine whether results
             obtained for directly observable networks generalize to more
             complex networks in which species interactions cannot be
             observed directly. As a case study, we consider data on the
             occurrences of 98 wood-inhabiting fungal species in managed
             and natural forests. We specifically ask if and how much the
             positions of wood-inhabiting fungal species within the
             interaction networks influence their responses to forest
             management. For this, we utilize a joint species
             distribution model that partitions variation in species
             occurrences among environmental (i.e. resource availability)
             and biotic (i.e. species-to-species associations)
             predictors. Our results indicate that in addition to the
             direct loss of resource-specialised species, forest
             management has indirect effects mediated through interactive
             associations. In particular, species with strong associative
             links to other species are especially sensitive to forest
             management.},
   Doi = {10.1111/oik.03674},
   Key = {fds333226}
}

@article{fds329111,
   Author = {Durante, D and Paganin, S and Scarpa, B and Dunson,
             DB},
   Title = {Bayesian modelling of networks in complex business
             intelligence problems},
   Journal = {Journal of the Royal Statistical Society. Series C, Applied
             Statistics},
   Volume = {66},
   Number = {3},
   Pages = {555-580},
   Publisher = {WILEY},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1111/rssc.12168},
   Abstract = {© 2016 Royal Statistical Society Complex network data
             problems are increasingly common in many fields of
             application. Our motivation is drawn from strategic
             marketing studies monitoring customer choices of specific
             products, along with co-subscription networks encoding
             multiple-purchasing behaviour. Data are available for
             several agencies within the same insurance company, and our
             goal is to exploit co-subscription networks efficiently to
             inform targeted advertising of cross-sell strategies to
             currently monoproduct customers. We address this goal by
             developing a Bayesian hierarchical model, which clusters
             agencies according to common monoproduct customer choices
             and co-subscription networks. Within each cluster, we
             efficiently model customer behaviour via a cluster-dependent
             mixture of latent eigenmodels. This formulation provides key
             information on monoproduct customer choices and
             multiple-purchasing behaviour within each cluster, informing
             targeted cross-sell strategies. We develop simple algorithms
             for tractable inference and assess performance in
             simulations and an application to business
             intelligence.},
   Doi = {10.1111/rssc.12168},
   Key = {fds329111}
}

@article{fds326037,
   Author = {McKinney, M and Moffitt, AB and Gaulard, P and Travert, M and De Leval,
             L and Nicolae, A and Raffeld, M and Jaffe, ES and Pittaluga, S and Xi, L and Heavican, T and Iqbal, J and Belhadj, K and Delfau-Larue, MH and Fataccioli, V and Czader, MB and Lossos, IS and Chapman-Fredricks,
             JR and Richards, KL and Fedoriw, Y and Ondrejka, SL and Hsi, ED and Low, L and Weisenburger, D and Chan, WC and Mehta-Shah, N and Horwitz, S and Bernal-Mizrachi, L and Flowers, CR and Beaven, AW and Parihar, M and Baseggio, L and Parrens, M and Moreau, A and Sujobert, P and Pilichowska, M and Evens, AM and Chadburn, A and Au-Yeung, RKH and Srivastava, G and Choi, WWL and Goodlad, JR and Aurer, I and Basic-Kinda, S and Gascoyne, RD and Davis, NS and Li, G and Zhang, J and Rajagopalan, D and Reddy, A and Love, C and Levy, S and Zhuang, Y and Datta, J and Dunson, DB and Davé, SS},
   Title = {The Genetic Basis of Hepatosplenic T-cell
             Lymphoma.},
   Journal = {Cancer Discov},
   Volume = {7},
   Number = {4},
   Pages = {369-379},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1158/2159-8290.CD-16-0330},
   Abstract = {Hepatosplenic T-cell lymphoma (HSTL) is a rare and lethal
             lymphoma; the genetic drivers of this disease are unknown.
             Through whole-exome sequencing of 68 HSTLs, we define
             recurrently mutated driver genes and copy-number alterations
             in the disease. Chromatin-modifying genes, including SETD2,
             INO80, and ARID1B, were commonly mutated in HSTL, affecting
             62% of cases. HSTLs manifest frequent mutations in STAT5B
             (31%), STAT3 (9%), and PIK3CD (9%), for which there
             currently exist potential targeted therapies. In addition,
             we noted less frequent events in EZH2, KRAS, and TP53SETD2
             was the most frequently silenced gene in HSTL. We
             experimentally demonstrated that SETD2 acts as a tumor
             suppressor gene. In addition, we found that mutations in
             STAT5B and PIK3CD activate critical signaling pathways
             important to cell survival in HSTL. Our work thus defines
             the genetic landscape of HSTL and implicates gene mutations
             linked to HSTL pathogenesis and potential treatment
             targets.Significance: We report the first systematic
             application of whole-exome sequencing to define the genetic
             basis of HSTL, a rare but lethal disease. Our work defines
             SETD2 as a tumor suppressor gene in HSTL and implicates
             genes including INO80 and PIK3CD in the disease. Cancer
             Discov; 7(4); 369-79. ©2017 AACR.See related commentary by
             Yoshida and Weinstock, p. 352This article is highlighted in
             the In This Issue feature, p. 339.},
   Doi = {10.1158/2159-8290.CD-16-0330},
   Key = {fds326037}
}

@article{fds329992,
   Author = {Tikhonov, G and Abrego, N and Dunson, D and Ovaskainen,
             O},
   Title = {Using joint species distribution models for evaluating how
             species-to-species associations depend on the environmental
             context},
   Journal = {Methods in Ecology and Evolution},
   Volume = {8},
   Number = {4},
   Pages = {443-452},
   Publisher = {WILEY},
   Editor = {Warton, D},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.1111/2041-210X.12723},
   Abstract = {© 2017 The Authors. Methods in Ecology and Evolution ©
             2017 British Ecological Society Joint species distribution
             models (JSDM) are increasingly used to analyse community
             ecology data. Recent progress with JSDMs has provided
             ecologists with new tools for estimating species
             associations (residual co-occurrence patterns after
             accounting for environmental niches) from large data sets,
             as well as for increasing the predictive power of species
             distribution models (SDMs) by accounting for such
             associations. Yet, one critical limitation of JSDMs
             developed thus far is that they assume constant species
             associations. However, in real ecological communities, the
             direction and strength of interspecific interactions are
             likely to be different under different environmental
             conditions. In this paper, we overcome the shortcoming of
             present JSDMs by allowing species associations covary with
             measured environmental covariates. To estimate
             environmental-dependent species associations, we utilize a
             latent variable structure, where the factor loadings are
             modelled as a linear regression to environmental covariates.
             We illustrate the performance of the statistical framework
             with both simulated and real data. Our results show that
             JSDMs perform substantially better in inferring
             environmental-dependent species associations than single
             SDMs, especially with sparse data. Furthermore, JSDMs
             consistently overperform SDMs in terms of predictive power
             for generating predictions that account for
             environment-dependent biotic associations. We implemented
             the statistical framework as a MATLAB package, which
             includes tools both for model parameterization as well as
             for post-processing of results, particularly for addressing
             whether and how species associations depend on the
             environmental conditions. Our statistical framework provides
             a new tool for ecologists who wish to investigate from
             non-manipulative observational community data the dependency
             of interspecific interactions on environmental context. Our
             method can be applied to answer the fundamental questions in
             community ecology about how species’ interactions shift in
             changing environmental conditions, as well as to predict
             future changes of species’ interactions in response to
             global change.},
   Doi = {10.1111/2041-210X.12723},
   Key = {fds329992}
}

@article{fds325977,
   Author = {Lin, L and Rao, V and Dunson, D},
   Title = {Bayesian nonparametric inference on the stiefel
             manifold},
   Journal = {Statistica Sinica},
   Volume = {27},
   Number = {2},
   Pages = {535-553},
   Publisher = {Institute of Statistical Science},
   Year = {2017},
   Month = {April},
   url = {http://dx.doi.org/10.5705/ss.202016.0017},
   Abstract = {The Stiefel manifold Vp,d is the space of all d × p
             orthonormal matrices, with the d-1 hypersphere and the space
             of all orthogonal matrices constituting special cases. In
             modeling data lying on the Stiefel manifold, parametric
             distributions such as the matrix Langevin distribution are
             often used; however, model misspecification is a concern and
             it is desirable to have nonparametric alternatives. Current
             nonparametric methods are mainly Fŕechet-mean based. We
             take a fully generative nonparametric approach, which relies
             on mixing parametric kernels such as the matrix Langevin.
             The proposed kernel mixtures can approximate a large class
             of distributions on the Stiefel manifold, and we develop
             theory showing posterior consistency. While there exists
             work developing general posterior consistency results,
             extending these results to this particular manifold requires
             substantial new theory. Posterior inference is illustrated
             on a dataset of near-Earth objects.},
   Doi = {10.5705/ss.202016.0017},
   Key = {fds325977}
}

@article{fds329991,
   Author = {Ovaskainen, O and Tikhonov, G and Norberg, A and Guillaume Blanchet,
             F and Duan, L and Dunson, D and Roslin, T and Abrego,
             N},
   Title = {How to make more out of community data? A conceptual
             framework and its implementation as models and
             software.},
   Journal = {Ecology Letters},
   Volume = {20},
   Number = {5},
   Pages = {561-576},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1111/ele.12757},
   Abstract = {Community ecology aims to understand what factors determine
             the assembly and dynamics of species assemblages at
             different spatiotemporal scales. To facilitate the
             integration between conceptual and statistical approaches in
             community ecology, we propose Hierarchical Modelling of
             Species Communities (HMSC) as a general, flexible framework
             for modern analysis of community data. While
             non-manipulative data allow for only correlative and not
             causal inference, this framework facilitates the formulation
             of data-driven hypotheses regarding the processes that
             structure communities. We model environmental filtering by
             variation and covariation in the responses of individual
             species to the characteristics of their environment, with
             potential contingencies on species traits and phylogenetic
             relationships. We capture biotic assembly rules by
             species-to-species association matrices, which may be
             estimated at multiple spatial or temporal scales. We
             operationalise the HMSC framework as a hierarchical Bayesian
             joint species distribution model, and implement it as R- and
             Matlab-packages which enable computationally efficient
             analyses of large data sets. Armed with this tool, community
             ecologists can make sense of many types of data, including
             spatially explicit data and time-series data. We illustrate
             the use of this framework through a series of diverse
             ecological examples.},
   Doi = {10.1111/ele.12757},
   Key = {fds329991}
}

@article{fds329990,
   Author = {Ovaskainen, O and Tikhonov, G and Dunson, D and Grøtan, V and Engen, S and Sæther, B-E and Abrego, N},
   Title = {How are species interactions structured in species-rich
             communities? A new method for analysing time-series
             data.},
   Journal = {Proceedings of the Royal Society B: Biological
             Sciences},
   Volume = {284},
   Number = {1855},
   Pages = {20170768-20170768},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1098/rspb.2017.0768},
   Abstract = {Estimation of intra- and interspecific interactions from
             time-series on species-rich communities is challenging due
             to the high number of potentially interacting species pairs.
             The previously proposed sparse interactions model overcomes
             this challenge by assuming that most species pairs do not
             interact. We propose an alternative model that does not
             assume that any of the interactions are necessarily zero,
             but summarizes the influences of individual species by a
             small number of community-level drivers. The community-level
             drivers are defined as linear combinations of species
             abundances, and they may thus represent e.g. the total
             abundance of all species or the relative proportions of
             different functional groups. We show with simulated and real
             data how our approach can be used to compare different
             hypotheses on community structure. In an empirical example
             using aquatic microorganisms, the community-level drivers
             model clearly outperformed the sparse interactions model in
             predicting independent validation data.},
   Doi = {10.1098/rspb.2017.0768},
   Key = {fds329990}
}

@article{fds327282,
   Author = {Moffitt, AB and Ondrejka, SL and McKinney, M and Rempel, RE and Goodlad,
             JR and Teh, CH and Leppa, S and Mannisto, S and Kovanen, PE and Tse, E and Au-Yeung, RKH and Kwong, Y-L and Srivastava, G and Iqbal, J and Yu, J and Naresh, K and Villa, D and Gascoyne, RD and Said, J and Czader, MB and Chadburn, A and Richards, KL and Rajagopalan, D and Davis, NS and Smith,
             EC and Palus, BC and Tzeng, TJ and Healy, JA and Lugar, PL and Datta, J and Love, C and Levy, S and Dunson, DB and Zhuang, Y and Hsi, ED and Dave,
             SS},
   Title = {Enteropathy-associated T cell lymphoma subtypes are
             characterized by loss of function of SETD2.},
   Journal = {J Exp Med},
   Volume = {214},
   Number = {5},
   Pages = {1371-1386},
   Year = {2017},
   Month = {May},
   url = {http://dx.doi.org/10.1084/jem.20160894},
   Abstract = {Enteropathy-associated T cell lymphoma (EATL) is a lethal,
             and the most common, neoplastic complication of celiac
             disease. Here, we defined the genetic landscape of EATL
             through whole-exome sequencing of 69 EATL tumors. SETD2 was
             the most frequently silenced gene in EATL (32% of cases).
             The JAK-STAT pathway was the most frequently mutated
             pathway, with frequent mutations in STAT5B as well as JAK1,
             JAK3, STAT3, and SOCS1 We also identified mutations in KRAS,
             TP53, and TERT Type I EATL and type II EATL (monomorphic
             epitheliotropic intestinal T cell lymphoma) had highly
             overlapping genetic alterations indicating shared mechanisms
             underlying their pathogenesis. We modeled the effects of
             SETD2 loss in vivo by developing a T cell-specific knockout
             mouse. These mice manifested an expansion of γδ T cells,
             indicating novel roles for SETD2 in T cell development and
             lymphomagenesis. Our data render the most comprehensive
             genetic portrait yet of this uncommon but lethal disease and
             may inform future classification schemes.},
   Doi = {10.1084/jem.20160894},
   Key = {fds327282}
}

@article{fds343492,
   Author = {Rao, V and Adams, RP and Dunson, DD},
   Title = {Bayesian inference for Matérn repulsive
             processes},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {79},
   Number = {3},
   Pages = {877-897},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1111/rssb.12198},
   Abstract = {© 2016 Royal Statistical Society In many applications
             involving point pattern data, the Poisson process assumption
             is unrealistic, with the data exhibiting a more regular
             spread. Such repulsion between events is exhibited by trees
             for example, because of competition for light and nutrients.
             Other examples include the locations of biological cells and
             cities, and the times of neuronal spikes. Given the many
             applications of repulsive point processes, there is a
             surprisingly limited literature developing flexible,
             realistic and interpretable models, as well as efficient
             inferential methods. We address this gap by developing a
             modelling framework around the Matérn type III repulsive
             process. We consider some extensions of the original Matérn
             type III process for both the homogeneous and the
             inhomogeneous cases. We also derive the probability density
             of this generalized Matérn process, allowing us to
             characterize the conditional distribution of the various
             latent variables, and leading to a novel and efficient
             Markov chain Monte Carlo algorithm. We apply our ideas to
             data sets of spatial locations of trees, nerve fibre cells
             and Greyhound bus stations.},
   Doi = {10.1111/rssb.12198},
   Key = {fds343492}
}

@article{fds326919,
   Author = {Schaich Borg and J and Srivastava, S and Lin, L and Heffner, J and Dunson,
             D and Dzirasa, K and de Lecea, L},
   Title = {Rat intersubjective decisions are encoded by
             frequency-specific oscillatory contexts.},
   Journal = {Brain and Behavior},
   Volume = {7},
   Number = {6},
   Pages = {e00710},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1002/brb3.710},
   Abstract = {INTRODUCTION: It is unknown how the brain coordinates
             decisions to withstand personal costs in order to prevent
             other individuals' distress. Here we test whether local
             field potential (LFP) oscillations between brain regions
             create "neural contexts" that select specific brain
             functions and encode the outcomes of these types of
             intersubjective decisions. METHODS: Rats participated in an
             "Intersubjective Avoidance Test" (IAT) that tested rats'
             willingness to enter an innately aversive chamber to prevent
             another rat from getting shocked. c-Fos immunoreactivity was
             used to screen for brain regions involved in IAT
             performance. Multi-site local field potential (LFP)
             recordings were collected simultaneously and bilaterally
             from five brain regions implicated in the c-Fos studies
             while rats made decisions in the IAT. Local field potential
             recordings were analyzed using an elastic net penalized
             regression framework. RESULTS: Rats voluntarily entered an
             innately aversive chamber to prevent another rat from
             getting shocked, and c-Fos immunoreactivity in brain regions
             known to be involved in human empathy-including the anterior
             cingulate, insula, orbital frontal cortex, and
             amygdala-correlated with the magnitude of "intersubjective
             avoidance" each rat displayed. Local field potential
             recordings revealed that optimal accounts of rats'
             performance in the task require specific frequencies of LFP
             oscillations between brain regions in addition to specific
             frequencies of LFP oscillations within brain regions. Alpha
             and low gamma coherence between spatially distributed brain
             regions predicts more intersubjective avoidance, while theta
             and high gamma coherence between a separate subset of brain
             regions predicts less intersubjective avoidance. Phase
             relationship analyses indicated that choice-relevant
             coherence in the alpha range reflects information passed
             from the amygdala to cortical structures, while coherence in
             the theta range reflects information passed in the reverse
             direction. CONCLUSION: These results indicate that the
             frequency-specific "neural context" surrounding brain
             regions involved in social cognition encodes outcomes of
             decisions that affect others, above and beyond signals from
             any set of brain regions in isolation.},
   Doi = {10.1002/brb3.710},
   Key = {fds326919}
}

@article{fds327028,
   Author = {Zhu, B and Dunson, DB},
   Title = {Bayesian functional data modeling for heterogeneous
             volatility},
   Journal = {Bayesian Analysis},
   Volume = {12},
   Number = {2},
   Pages = {335-350},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1214/16-BA1004},
   Abstract = {© 2017 International Society for Bayesian Analysis.
             Although there are many methods for functional data
             analysis, less emphasis is put on characterizing variability
             among volatilities of individual functions. In particular,
             certain individuals exhibit erratic swings in their
             trajectory while other individuals have more stable
             trajectories. There is evidence of such volatility
             heterogeneity in blood pressure trajectories during
             pregnancy, for example, and reason to suspect that
             volatility is a biologically important feature. Most
             functional data analysis models implicitly assume similar or
             identical smoothness of the individual functions, and hence
             can lead to misleading inferences on volatility and an
             inadequate representation of the functions. We propose a
             novel class of functional data analysis models characterized
             using hierarchical stochastic differential equations. We
             model the derivatives of a mean function and deviation
             functions using Gaussian processes, while also allowing
             covariate dependence including on the volatilities of the
             deviation functions. Following a Bayesian approach to
             inference, a Markov chain Monte Carlo algorithm is used for
             posterior computation. The methods are tested on simulated
             data and applied to blood pressure trajectories during
             pregnancy.},
   Doi = {10.1214/16-BA1004},
   Key = {fds327028}
}

@article{fds327029,
   Author = {Wang, L and Durante, D and Jung, RE and Dunson, DB},
   Title = {Bayesian network-response regression.},
   Journal = {Bioinformatics (Oxford, England)},
   Volume = {33},
   Number = {12},
   Pages = {1859-1866},
   Year = {2017},
   Month = {June},
   url = {http://dx.doi.org/10.1093/bioinformatics/btx050},
   Abstract = {There is increasing interest in learning how human brain
             networks vary as a function of a continuous trait, but
             flexible and efficient procedures to accomplish this goal
             are limited. We develop a Bayesian semiparametric model,
             which combines low-rank factorizations and flexible Gaussian
             process priors to learn changes in the conditional
             expectation of a network-valued random variable across the
             values of a continuous predictor, while including
             subject-specific random effects.The formulation leads to a
             general framework for inference on changes in brain network
             structures across human traits, facilitating borrowing of
             information and coherently characterizing uncertainty. We
             provide an efficient Gibbs sampler for posterior computation
             along with simple procedures for inference, prediction and
             goodness-of-fit assessments. The model is applied to learn
             how human brain networks vary across individuals with
             different intelligence scores. Results provide interesting
             insights on the association between intelligence and brain
             connectivity, while demonstrating good predictive
             performance.Source code implemented in R and data are
             available at https://github.com/wangronglu/BNRR.rl.wang@duke.edu.Supplementary
             data are available at Bioinformatics online.},
   Doi = {10.1093/bioinformatics/btx050},
   Key = {fds327029}
}

@article{fds329353,
   Author = {Guhaniyogi, R and Qamar, S and Dunson, DB},
   Title = {Bayesian tensor regression},
   Journal = {Journal of Machine Learning Research},
   Volume = {18},
   Pages = {1-31},
   Year = {2017},
   Month = {August},
   Abstract = {©2017 Rajarshi Guhaniyogi and Shaan Qamar and David B.
             Dunson. We propose a Bayesian approach to regression with a
             scalar response on vector and tensor covariates.
             Vectorization of the tensor prior to analysis fails to
             exploit the structure, often leading to poor estimation and
             predictive performance. We introduce a novel class of
             multiway shrinkage priors for tensor coefficients in the
             regression setting and present posterior consistency results
             under mild conditions. A computationally efficient Markov
             chain Monte Carlo algorithm is developed for posterior
             computation. Simulation studies illustrate substantial gains
             over existing tensor regression methods in terms of
             estimation and parameter inference. Our approach is further
             illustrated in a neuroimaging application.},
   Key = {fds329353}
}

@article{fds329109,
   Author = {Li, C and Srivastava, S and Dunson, DB},
   Title = {Simple, scalable and accurate posterior interval
             estimation},
   Journal = {Biometrika},
   Volume = {104},
   Number = {3},
   Pages = {665-680},
   Publisher = {Oxford University Press (OUP)},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asx033},
   Abstract = {© 2017 Biometrika Trust. Standard posterior sampling
             algorithms, such as Markov chain Monte Carlo procedures,
             face major challenges in scaling up to massive datasets. We
             propose a simple and general posterior interval estimation
             algorithm to rapidly and accurately estimate quantiles of
             the posterior distributions for one-dimensional functionals.
             Our algorithm runs Markov chain Monte Carlo in parallel for
             subsets of the data, and then averages quantiles estimated
             from each subset. We provide strong theoretical guarantees
             and show that the credible intervals from our algorithm
             asymptotically approximate those from the full posterior in
             the leading parametric order. Our algorithm has a better
             balance of accuracy and efficiency than its competitors
             across a variety of simulations and a real-data
             example.},
   Doi = {10.1093/biomet/asx033},
   Key = {fds329109}
}

@article{fds323700,
   Author = {Lock, EF and Dunson, DB},
   Title = {Bayesian genome- and epigenome-wide association studies with
             gene level dependence.},
   Journal = {Biometrics},
   Volume = {73},
   Number = {3},
   Pages = {1018-1028},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1111/biom.12649},
   Abstract = {High-throughput genetic and epigenetic data are often
             screened for associations with an observed phenotype. For
             example, one may wish to test hundreds of thousands of
             genetic variants, or DNA methylation sites, for an
             association with disease status. These genomic variables can
             naturally be grouped by the gene they encode, among other
             criteria. However, standard practice in such applications is
             independent screening with a universal correction for
             multiplicity. We propose a Bayesian approach in which the
             prior probability of an association for a given genomic
             variable depends on its gene, and the gene-specific
             probabilities are modeled nonparametrically. This
             hierarchical model allows for appropriate gene and
             genome-wide multiplicity adjustments, and can be
             incorporated into a variety of Bayesian association
             screening methodologies with negligible increase in
             computational complexity. We describe an application to
             screening for differences in DNA methylation between lower
             grade glioma and glioblastoma multiforme tumor samples from
             The Cancer Genome Atlas. Software is available via the
             package BayesianScreening for R: github.com/lockEF/BayesianScreening.},
   Doi = {10.1111/biom.12649},
   Key = {fds323700}
}

@article{fds329110,
   Author = {Srivastava, S and Engelhardt, BE and Dunson, DB},
   Title = {Expandable factor analysis.},
   Journal = {Biometrika},
   Volume = {104},
   Number = {3},
   Pages = {649-663},
   Year = {2017},
   Month = {September},
   url = {http://dx.doi.org/10.1093/biomet/asx030},
   Abstract = {Bayesian sparse factor models have proven useful for
             characterizing dependence in multivariate data, but scaling
             computation to large numbers of samples and dimensions is
             problematic. We propose expandable factor analysis for
             scalable inference in factor models when the number of
             factors is unknown. The method relies on a continuous
             shrinkage prior for efficient maximum a posteriori
             estimation of a low-rank and sparse loadings matrix. The
             structure of the prior leads to an estimation algorithm that
             accommodates uncertainty in the number of factors. We
             propose an information criterion to select the
             hyperparameters of the prior. Expandable factor analysis has
             better false discovery rates and true positive rates than
             its competitors across diverse simulation settings. We apply
             the proposed approach to a gene expression study of ageing
             in mice, demonstrating superior results relative to four
             competing methods.},
   Doi = {10.1093/biomet/asx030},
   Key = {fds329110}
}

@article{fds332379,
   Author = {Durante, D and Dunson, DB and Vogelstein, JT},
   Title = {Rejoinder: Nonparametric Bayes Modeling of Populations of
             Networks},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {520},
   Pages = {1547-1552},
   Publisher = {Informa UK Limited},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2017.1395643},
   Doi = {10.1080/01621459.2017.1395643},
   Key = {fds332379}
}

@article{fds327388,
   Author = {Durante, D and Dunson, DB and Vogelstein, JT},
   Title = {Nonparametric Bayes Modeling of Populations of
             Networks},
   Journal = {Journal of the American Statistical Association},
   Volume = {112},
   Number = {520},
   Pages = {1516-1530},
   Publisher = {Informa UK Limited},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2016.1219260},
   Abstract = {© 2017 American Statistical Association. Replicated network
             data are increasingly available in many research fields. For
             example, in connectomic applications, interconnections among
             brain regions are collected for each patient under study,
             motivating statistical models which can flexibly
             characterize the probabilistic generative mechanism
             underlying these network-valued data. Available models for a
             single network are not designed specifically for inference
             on the entire probability mass function of a network-valued
             random variable and therefore lack flexibility in
             characterizing the distribution of relevant topological
             structures. We propose a flexible Bayesian nonparametric
             approach for modeling the population distribution of
             network-valued data. The joint distribution of the edges is
             defined via a mixture model that reduces dimensionality and
             efficiently incorporates network information within each
             mixture component by leveraging latent space
             representations. The formulation leads to an efficient Gibbs
             sampler and provides simple and coherent strategies for
             inference and goodness-of-fit assessments. We provide
             theoretical results on the flexibility of our model and
             illustrate improved performance—compared to
             state-of-the-art models—in simulations and application to
             human brain networks. Supplementary materials for this
             article are available online.},
   Doi = {10.1080/01621459.2016.1219260},
   Key = {fds327388}
}

@article{fds329352,
   Author = {Reddy, A and Zhang, J and Davis, NS and Moffitt, AB and Love, CL and Waldrop, A and Leppa, S and Pasanen, A and Meriranta, L and Karjalainen-Lindsberg, M-L and Nørgaard, P and Pedersen, M and Gang,
             AO and Høgdall, E and Heavican, TB and Lone, W and Iqbal, J and Qin, Q and Li, G and Kim, SY and Healy, J and Richards, KL and Fedoriw, Y and Bernal-Mizrachi, L and Koff, JL and Staton, AD and Flowers, CR and Paltiel, O and Goldschmidt, N and Calaminici, M and Clear, A and Gribben, J and Nguyen, E and Czader, MB and Ondrejka, SL and Collie, A and Hsi, ED and Tse, E and Au-Yeung, RKH and Kwong, Y-L and Srivastava, G and Choi, WWL and Evens, AM and Pilichowska, M and Sengar, M and Reddy, N and Li, S and Chadburn, A and Gordon, LI and Jaffe, ES and Levy, S and Rempel,
             R and Tzeng, T and Happ, LE and Dave, T and Rajagopalan, D and Datta, J and Dunson, DB and Dave, SS},
   Title = {Genetic and Functional Drivers of Diffuse Large B Cell
             Lymphoma.},
   Journal = {Cell},
   Volume = {171},
   Number = {2},
   Pages = {481-494.e15},
   Year = {2017},
   Month = {October},
   url = {http://dx.doi.org/10.1016/j.cell.2017.09.027},
   Abstract = {Diffuse large B cell lymphoma (DLBCL) is the most common
             form of blood cancer and is characterized by a striking
             degree of genetic and clinical heterogeneity. This
             heterogeneity poses a major barrier to understanding the
             genetic basis of the disease and its response to therapy.
             Here, we performed an integrative analysis of whole-exome
             sequencing and transcriptome sequencing in a cohort of 1,001
             DLBCL patients to comprehensively define the landscape of
             150 genetic drivers of the disease. We characterized the
             functional impact of these genes using an unbiased CRISPR
             screen of DLBCL cell lines to define oncogenes that promote
             cell growth. A prognostic model comprising these genetic
             alterations outperformed current established methods: cell
             of origin, the International Prognostic Index comprising
             clinical variables, and dual MYC and BCL2 expression. These
             results comprehensively define the genetic drivers and their
             functional roles in DLBCL to identify new therapeutic
             opportunities in the disease.},
   Doi = {10.1016/j.cell.2017.09.027},
   Key = {fds329352}
}

@article{fds332886,
   Author = {Shang, Y and Dunson, D and Song, JS},
   Title = {Exploiting big data in logistics risk assessment via
             Bayesian nonparametrics},
   Journal = {Operations Research},
   Volume = {65},
   Number = {6},
   Pages = {1574-1588},
   Publisher = {Institute for Operations Research and the Management
             Sciences (INFORMS)},
   Year = {2017},
   Month = {November},
   url = {http://dx.doi.org/10.1287/opre.2017.1612},
   Abstract = {© 2017 INFORMS. In cargo logistics, a key performance
             measure is transport risk, defined as the deviation of the
             actual arrival time from the planned arrival time. Neither
             earliness nor tardiness is desirable for customer and
             freight forwarders. In this paper, we investigate ways to
             assess and forecast transport risks using a half-year of air
             cargo data, provided by a leading forwarder on 1,336 routes
             served by 20 airlines. Interestingly, our preliminary data
             analysis shows a strong multimodal feature in the transport
             risks, driven by unobserved events, such as cargo missing
             flights. To accommodate this feature, we introduce a
             Bayesian nonparametric model-the probit stick-breaking
             process mixture model-for flexible estimation of the
             conditional (i.e., state-dependent) density function of
             transport risk. We demonstrate that using alternative
             methods can lead to misleading inferences. Our model
             provides a tool for the forwarder to offer customized price
             and service quotes. It can also generate baseline airline
             performance to enable fair supplier evaluation. Furthermore,
             the method allows us to separate recurrent risks from
             disruption risks. This is important, because hedging
             strategies for these two kinds of risks are often
             drastically di erent.},
   Doi = {10.1287/opre.2017.1612},
   Key = {fds332886}
}

@article{fds332378,
   Author = {Minsker, S and Srivastava, S and Lin, L and Dunson,
             DB},
   Title = {Robust and scalable bayes via a median of subset posterior
             measures},
   Journal = {Journal of Machine Learning Research},
   Volume = {18},
   Pages = {1-40},
   Year = {2017},
   Month = {December},
   Abstract = {© 2017 Stanislav Minsker, Sanvesh Srivastava, Lizhen Lin
             and David B. Dunson. We propose a novel approach to Bayesian
             analysis that is provably robust to outliers in the data and
             often has computational advantages over standard methods.
             Our technique is based on splitting the data into
             non-overlapping subgroups, evaluating the posterior
             distribution given each independent subgroup, and then
             combining the resulting measures. The main novelty of our
             approach is the proposed aggregation step, which is based on
             the evaluation of a median in the space of probability
             measures equipped with a suitable collection of distances
             that can be quickly and efficiently evaluated in practice.
             We present both theoretical and numerical evidence
             illustrating the improvements achieved by our
             method.},
   Key = {fds332378}
}

@article{fds332363,
   Author = {Wheeler, MW and Dunson, DB and Herring, AH},
   Title = {Bayesian Local Extremum Splines.},
   Journal = {Biometrika},
   Volume = {104},
   Number = {4},
   Pages = {939-952},
   Publisher = {Oxford University Press (OUP)},
   Year = {2017},
   Month = {December},
   Abstract = {We consider shape restricted nonparametric regression on a
             closed set [Formula: see text], where it is reasonable to
             assume the function has no more than H local extrema
             interior to [Formula: see text]. Following a Bayesian
             approach we develop a nonparametric prior over a novel class
             of local extremum splines. This approach is shown to be
             consistent when modeling any continuously differentiable
             function within the class considered, and is used to develop
             methods for testing hypotheses on the shape of the curve.
             Sampling algorithms are developed, and the method is applied
             in simulation studies and data examples where the shape of
             the curve is of interest.},
   Key = {fds332363}
}

@article{fds335796,
   Author = {Bertrán, MA and Martínez, NL and Wang, Y and Dunson, D and Sapiro, G and Ringach, D},
   Title = {Active learning of cortical connectivity from two-photon
             imaging data.},
   Journal = {Plos One},
   Volume = {13},
   Number = {5},
   Pages = {e0196527},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1371/journal.pone.0196527},
   Abstract = {Understanding how groups of neurons interact within a
             network is a fundamental question in system neuroscience.
             Instead of passively observing the ongoing activity of a
             network, we can typically perturb its activity, either by
             external sensory stimulation or directly via techniques such
             as two-photon optogenetics. A natural question is how to use
             such perturbations to identify the connectivity of the
             network efficiently. Here we introduce a method to infer
             sparse connectivity graphs from in-vivo, two-photon imaging
             of population activity in response to external stimuli. A
             novel aspect of the work is the introduction of a
             recommended distribution, incrementally learned from the
             data, to optimally refine the inferred network. Unlike
             existing system identification techniques, this "active
             learning" method automatically focuses its attention on key
             undiscovered areas of the network, instead of targeting
             global uncertainty indicators like parameter variance. We
             show how active learning leads to faster inference while, at
             the same time, provides confidence intervals for the network
             parameters. We present simulations on artificial small-world
             networks to validate the methods and apply the method to
             real data. Analysis of frequency of motifs recovered show
             that cortical networks are consistent with a small-world
             topology model.},
   Doi = {10.1371/journal.pone.0196527},
   Key = {fds335796}
}

@article{fds337687,
   Author = {Miller, JW and Dunson, DB},
   Title = {Robust Bayesian Inference via Coarsening},
   Journal = {Journal of the American Statistical Association},
   Pages = {1-13},
   Publisher = {Informa UK Limited},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2018.1469995},
   Abstract = {© 2018, © 2018 American Statistical Association. The
             standard approach to Bayesian inference is based on the
             assumption that the distribution of the data belongs to the
             chosen model class. However, even a small violation of this
             assumption can have a large impact on the outcome of a
             Bayesian procedure. We introduce a novel approach to
             Bayesian inference that improves robustness to small
             departures from the model: rather than conditioning on the
             event that the observed data are generated by the model, one
             conditions on the event that the model generates data close
             to the observed data, in a distributional sense. When
             closeness is defined in terms of relative entropy, the
             resulting “coarsened” posterior can be approximated by
             simply tempering the likelihood—that is, by raising the
             likelihood to a fractional power—thus, inference can
             usually be implemented via standard algorithms, and one can
             even obtain analytical solutions when using conjugate
             priors. Some theoretical properties are derived, and we
             illustrate the approach with real and simulated data using
             mixture models and autoregressive models of unknown order.
             Supplementary materials for this article are available
             online.},
   Doi = {10.1080/01621459.2018.1469995},
   Key = {fds337687}
}

@article{fds340936,
   Author = {Johndrow, JE and Smith, A and Pillai, N and Dunson,
             DB},
   Title = {MCMC for Imbalanced Categorical Data},
   Journal = {Journal of the American Statistical Association},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2018.1505626},
   Abstract = {© 2018, © 2018 American Statistical Association. Many
             modern applications collect highly imbalanced categorical
             data, with some categories relatively rare. Bayesian
             hierarchical models combat data sparsity by borrowing
             information, while also quantifying uncertainty. However,
             posterior computation presents a fundamental barrier to
             routine use; a single class of algorithms does not work well
             in all settings and practitioners waste time trying
             different types of Markov chain Monte Carlo (MCMC)
             approaches. This article was motivated by an application to
             quantitative advertising in which we encountered extremely
             poor computational performance for data augmentation MCMC
             algorithms but obtained excellent performance for adaptive
             Metropolis. To obtain a deeper understanding of this
             behavior, we derive theoretical results on the computational
             complexity of commonly used data augmentation algorithms and
             the Random Walk Metropolis algorithm for highly imbalanced
             binary data. In this regime, our results show computational
             complexity of Metropolis is logarithmic in sample size,
             while data augmentation is polynomial in sample size. The
             root cause of this poor performance of data augmentation is
             a discrepancy between the rates at which the target density
             and MCMC step sizes concentrate. Our methods also show that
             MCMC algorithms that exhibit a similar discrepancy will fail
             in large samples—a result with substantial practical
             impact. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2018.1505626},
   Key = {fds340936}
}

@article{fds340385,
   Author = {Durante, D and Dunson, DB},
   Title = {Bayesian inference and testing of group differences in brain
             networks},
   Journal = {Bayesian Analysis},
   Volume = {13},
   Number = {1},
   Pages = {29-58},
   Publisher = {Institute of Mathematical Statistics},
   Year = {2018},
   Month = {January},
   url = {http://dx.doi.org/10.1214/16-BA1030},
   Abstract = {© 2018 International Society for Bayesian Analysis. Network
             data are increasingly collected along with other variables
             of interest. Our motivation is drawn from neurophysiology
             studies measuring brain connectivity networks for a sample
             of individuals along with their membership to a low or high
             creative reasoning group. It is of paramount importance to
             develop statistical methods for testing of global and local
             changes in the structural interconnections among brain
             regions across groups. We develop a general Bayesian
             procedure for inference and testing of group differences in
             the network structure, which relies on a nonparametric
             representation for the conditional probability mass function
             associated with a network-valued random variable. By
             leveraging a mixture of low-rank factorizations, we allow
             simple global and local hypothesis testing adjusting for
             multiplicity. An efficient Gibbs sampler is defined for
             posterior computation. We provide theoretical results on the
             flexibility of the model and assess testing performance in
             simulations. The approach is applied to provide novel
             insights on the relationships between human brain networks
             and creativity.},
   Doi = {10.1214/16-BA1030},
   Key = {fds340385}
}

@article{fds332810,
   Author = {van den Boom, W and Schroeder, RA and Manning, MW and Setji, TL and Fiestan, G-O and Dunson, DB},
   Title = {Effect of A1C and Glucose on Postoperative Mortality in
             Noncardiac and Cardiac Surgeries.},
   Journal = {Diabetes Care},
   Volume = {41},
   Number = {4},
   Pages = {782-788},
   Year = {2018},
   Month = {April},
   url = {http://dx.doi.org/10.2337/dc17-2232},
   Abstract = {OBJECTIVE: Hemoglobin A1c (A1C) is used in assessment of
             patients for elective surgeries because hyperglycemia
             increases risk of adverse events. However, the interplay of
             A1C, glucose, and surgical outcomes remains unclarified,
             with often only two of these three factors considered
             simultaneously. We assessed the association of preoperative
             A1C with perioperative glucose control and their
             relationship with 30-day mortality. RESEARCH DESIGN AND
             METHODS: Retrospective analysis on 431,480 surgeries within
             the Duke University Health System determined the association
             of preoperative A1C with perioperative glucose (averaged
             over the first 3 postoperative days) and 30-day mortality
             among 6,684 noncardiac and 6,393 cardiac surgeries with A1C
             and glucose measurements. A generalized additive model was
             used, enabling nonlinear relationships. RESULTS: A1C and
             glucose were strongly associated. Glucose and mortality were
             positively associated for noncardiac cases: 1.0% mortality
             at mean glucose of 100 mg/dL and 1.6% at mean glucose of 200
             mg/dL. For cardiac procedures, there was a striking U-shaped
             relationship between glucose and mortality, ranging from
             4.5% at 100 mg/dL to a nadir of 1.5% at 140 mg/dL and rising
             again to 6.9% at 200 mg/dL. A1C and 30-day mortality were
             not associated when controlling for glucose in noncardiac or
             cardiac procedures. CONCLUSIONS: Although A1C is positively
             associated with perioperative glucose, it is not associated
             with increased 30-day mortality after controlling for
             glucose. Perioperative glucose predicts 30-day mortality,
             linearly in noncardiac and nonlinearly in cardiac
             procedures. This confirms that perioperative glucose control
             is related to surgical outcomes but that A1C, reflecting
             antecedent glycemia, is a less useful predictor.},
   Doi = {10.2337/dc17-2232},
   Key = {fds332810}
}

@article{fds333225,
   Author = {Dunson, DB},
   Title = {Statistics in the big data era: Failures of the
             machine},
   Journal = {Statistics & Probability Letters},
   Volume = {136},
   Pages = {4-9},
   Publisher = {Elsevier BV},
   Year = {2018},
   Month = {May},
   url = {http://dx.doi.org/10.1016/j.spl.2018.02.028},
   Abstract = {© 2018 There is vast interest in automated methods for
             complex data analysis. However, there is a lack of
             consideration of (1) interpretability, (2) uncertainty
             quantification, (3) applications with limited training data,
             and (4) selection bias. Statistical methods can achieve
             (1)-(4) with a change in focus.},
   Doi = {10.1016/j.spl.2018.02.028},
   Key = {fds333225}
}

@article{fds333512,
   Author = {Zhang, Z and Descoteaux, M and Zhang, J and Girard, G and Chamberland,
             M and Dunson, D and Srivastava, A and Zhu, H},
   Title = {Mapping population-based structural connectomes.},
   Journal = {Neuroimage},
   Volume = {172},
   Pages = {130-145},
   Year = {2018},
   Month = {May},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2017.12.064},
   Abstract = {Advances in understanding the structural connectomes of
             human brain require improved approaches for the
             construction, comparison and integration of high-dimensional
             whole-brain tractography data from a large number of
             individuals. This article develops a population-based
             structural connectome (PSC) mapping framework to address
             these challenges. PSC simultaneously characterizes a large
             number of white matter bundles within and across different
             subjects by registering different subjects' brains based on
             coarse cortical parcellations, compressing the bundles of
             each connection, and extracting novel connection weights. A
             robust tractography algorithm and streamline post-processing
             techniques, including dilation of gray matter regions,
             streamline cutting, and outlier streamline removal are
             applied to improve the robustness of the extracted
             structural connectomes. The developed PSC framework can be
             used to reproducibly extract binary networks, weighted
             networks and streamline-based brain connectomes. We apply
             the PSC to Human Connectome Project data to illustrate its
             application in characterizing normal variations and
             heritability of structural connectomes in healthy
             subjects.},
   Doi = {10.1016/j.neuroimage.2017.12.064},
   Key = {fds333512}
}

@article{fds335795,
   Author = {Johndrow, JE and Lum, K and Dunson, DB},
   Title = {Theoretical limits of microclustering for record
             linkage.},
   Journal = {Biometrika},
   Volume = {105},
   Number = {2},
   Pages = {431-446},
   Year = {2018},
   Month = {June},
   url = {http://dx.doi.org/10.1093/biomet/asy003},
   Abstract = {There has been substantial recent interest in record
             linkage, where one attempts to group the records pertaining
             to the same entities from one or more large databases that
             lack unique identifiers. This can be viewed as a type of
             microclustering, with few observations per cluster and a
             very large number of clusters. We show that the problem is
             fundamentally hard from a theoretical perspective and, even
             in idealized cases, accurate entity resolution is
             effectively impossible unless the number of entities is
             small relative to the number of records and/or the
             separation between records from different entities is
             extremely large. These results suggest conservatism in
             interpretation of the results of record linkage, support
             collection of additional data to more accurately
             disambiguate the entities, and motivate a focus on coarser
             inference. For example, results from a simulation study
             suggest that sometimes one may obtain accurate results for
             population size estimation even when fine-scale entity
             resolution is inaccurate.},
   Doi = {10.1093/biomet/asy003},
   Key = {fds335795}
}

@article{fds335794,
   Author = {Shterev, ID and Dunson, DB and Chan, C and Sempowski,
             GD},
   Title = {Bayesian Multi-Plate High-Throughput Screening of
             Compounds.},
   Journal = {Scientific Reports},
   Volume = {8},
   Number = {1},
   Pages = {9551},
   Year = {2018},
   Month = {June},
   url = {http://dx.doi.org/10.1038/s41598-018-27531-w},
   Abstract = {High-throughput screening of compounds (chemicals) is an
             essential part of drug discovery, involving thousands to
             millions of compounds, with the purpose of identifying
             candidate hits. Most statistical tools, including the
             industry standard B-score method, work on individual
             compound plates and do not exploit cross-plate correlation
             or statistical strength among plates. We present a new
             statistical framework for high-throughput screening of
             compounds based on Bayesian nonparametric modeling. The
             proposed approach is able to identify candidate hits from
             multiple plates simultaneously, sharing statistical strength
             among plates and providing more robust estimates of compound
             activity. It can flexibly accommodate arbitrary
             distributions of compound activities and is applicable to
             any plate geometry. The algorithm provides a principled
             statistical approach for hit identification and false
             discovery rate control. Experiments demonstrate significant
             improvements in hit identification sensitivity and
             specificity over the B-score and R-score methods, which are
             highly sensitive to threshold choice. These improvements are
             maintained at low hit rates. The framework is implemented as
             an efficient R extension package BHTSpack and is suitable
             for large scale data sets.},
   Doi = {10.1038/s41598-018-27531-w},
   Key = {fds335794}
}

@article{fds339305,
   Author = {Guhaniyogi, R and Qamar, S and Dunson, DB},
   Title = {Bayesian Conditional Density Filtering},
   Journal = {Journal of Computational and Graphical Statistics : a Joint
             Publication of American Statistical Association, Institute
             of Mathematical Statistics, Interface Foundation of North
             America},
   Volume = {27},
   Number = {3},
   Pages = {657-672},
   Publisher = {Informa UK Limited},
   Year = {2018},
   Month = {July},
   url = {http://dx.doi.org/10.1080/10618600.2017.1422431},
   Abstract = {© 2018, © 2018 American Statistical Association, Institute
             of Mathematical Statistics, and Interface Foundation of
             North America. We propose a conditional density filtering
             (C-DF) algorithm for efficient online Bayesian inference.
             C-DF adapts MCMC sampling to the online setting, sampling
             from approximations to conditional posterior distributions
             obtained by propagating surrogate conditional sufficient
             statistics (a function of data and parameter estimates) as
             new data arrive. These quantities eliminate the need to
             store or process the entire dataset simultaneously and offer
             a number of desirable features. Often, these include a
             reduction in memory requirements and runtime and improved
             mixing, along with state-of-the-art parameter inference and
             prediction. These improvements are demonstrated through
             several illustrative examples including an application to
             high dimensional compressed regression. In the cases where
             dimension of the model parameter does not grow with time, we
             also establish sufficient conditions under which C-DF
             samples converge to the target posterior distribution
             asymptotically as sampling proceeds and more data arrive.
             Supplementary materials of C-DF are available
             online.},
   Doi = {10.1080/10618600.2017.1422431},
   Key = {fds339305}
}

@article{fds339365,
   Author = {van den Boom, W and Mao, C and Schroeder, RA and Dunson,
             DB},
   Title = {Extrema-weighted feature extraction for functional
             data.},
   Journal = {Bioinformatics},
   Volume = {34},
   Number = {14},
   Pages = {2457-2464},
   Year = {2018},
   Month = {July},
   url = {http://dx.doi.org/10.1093/bioinformatics/bty120},
   Abstract = {Motivation: Although there is a rich literature on methods
             for assessing the impact of functional predictors, the focus
             has been on approaches for dimension reduction that do not
             suit certain applications. Examples of standard approaches
             include functional linear models, functional principal
             components regression and cluster-based approaches, such as
             latent trajectory analysis. This article is motivated by
             applications in which the dynamics in a predictor, across
             times when the value is relatively extreme, are particularly
             informative about the response. For example, physicians are
             interested in relating the dynamics of blood pressure
             changes during surgery to post-surgery adverse outcomes, and
             it is thought that the dynamics are more important when
             blood pressure is significantly elevated or lowered.
             Results: We propose a novel class of extrema-weighted
             feature (XWF) extraction models. Key components in defining
             XWFs include the marginal density of the predictor, a
             function up-weighting values at extreme quantiles of this
             marginal, and functionals characterizing local dynamics.
             Algorithms are proposed for fitting of XWF-based regression
             and classification models, and are compared with current
             methods for functional predictors in simulations and a blood
             pressure during surgery application. XWFs find features of
             intraoperative blood pressure trajectories that are
             predictive of postoperative mortality. By their nature, most
             of these features cannot be found by previous methods.
             Availability and implementation: The R package 'xwf' is
             available at the CRAN repository: https://cran.r-project.org/package=xwf.
             Supplementary information: Supplementary data are available
             at Bioinformatics online.},
   Doi = {10.1093/bioinformatics/bty120},
   Key = {fds339365}
}

@article{fds338057,
   Author = {Srivastava, S and Li, C and Dunson, DB},
   Title = {Scalable Bayes via barycenter in Wasserstein
             space},
   Journal = {Journal of Machine Learning Research},
   Volume = {19},
   Pages = {1-35},
   Year = {2018},
   Month = {August},
   Abstract = {© 2018 Sanvesh Srivastava, Cheng Li and David B. Dunson.
             Divide-and-conquer based methods for Bayesian inference
             provide a general approach for tractable posterior inference
             when the sample size is large. These methods divide the data
             into smaller subsets, sample from the posterior distribution
             of parameters in parallel on all the subsets, and combine
             posterior samples from all the subsets to approximate the
             full data posterior distribution. The smaller size of any
             subset compared to the full data implies that posterior
             sampling on any subset is computationally more efficient
             than sampling from the true posterior distribution. Since
             the combination step takes negligible time relative to
             sampling, posterior computations can be scaled to massive
             data by dividing the full data into sufficiently large
             number of data subsets. One such approach relies on the
             geometry of posterior distributions estimated across
             different subsets and combines them through their barycenter
             in a Wasserstein space of probability measures. We provide
             theoretical guarantees on the accuracy of approximation that
             are valid in many applications. We show that the geometric
             method approximates the full data posterior distribution
             better than its competitors across diverse simulations and
             reproduces known results when applied to a movie ratings
             database.},
   Key = {fds338057}
}

@article{fds340499,
   Author = {Duan, LL and Johndrow, JE and Dunson, DB},
   Title = {Scaling up data augmentation MCMC via calibration},
   Journal = {Journal of Machine Learning Research},
   Volume = {19},
   Year = {2018},
   Month = {October},
   Abstract = {© 2018 Leo L. Duan, James E. Johndrow and David B. Dunson.
             There has been considerable interest in making Bayesian
             inference more scalable. In big data settings, most of the
             focus has been on reducing the computing time per iteration
             rather than reducing the number of iterations needed in
             Markov chain Monte Carlo (MCMC). This article considers data
             augmentation MCMC (DA-MCMC), a widely used technique.
             DA-MCMC samples tend to become highly autocorrelated in
             large samples, due to a mis-calibration problem in which
             conditional posterior distributions given augmented data are
             too concentrated. This makes it necessary to collect very
             long MCMC paths to obtain acceptably low MC error. To combat
             this inefficiency, we propose a family of calibrated data
             augmentation algorithms, which appropriately adjust the
             variance of conditional posterior distributions. A
             Metropolis-Hastings step is used to eliminate bias in the
             stationary distribution of the resulting sampler. Compared
             to existing alternatives, this approach can dramatically
             reduce MC error by reducing autocorrelation and increasing
             the effective number of DA-MCMC samples per unit of
             computing time. The approach is simple and applicable to a
             broad variety of existing data augmentation algorithms. We
             focus on three popular generalized linear models: probit,
             logistic and Poisson log-linear. Dramatic gains in
             computational efficiency are shown in applications.},
   Key = {fds340499}
}

@article{fds335793,
   Author = {Sarkar, A and Chabout, J and Macopson, JJ and Jarvis, ED and Dunson,
             DB},
   Title = {Bayesian Semiparametric Mixed Effects Markov Models With
             Application to Vocalization Syntax},
   Journal = {Journal of the American Statistical Association},
   Volume = {113},
   Number = {524},
   Pages = {1515-1527},
   Publisher = {Informa UK Limited},
   Year = {2018},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2018.1423986},
   Abstract = {© 2018, © 2018 American Statistical Association. Studying
             the neurological, genetic, and evolutionary basis of human
             vocal communication mechanisms using animal vocalization
             models is an important field of neuroscience. The datasets
             typically comprise structured sequences of syllables or
             “songs” produced by animals from different genotypes
             under different social contexts. It has been difficult to
             come up with sophisticated statistical methods that
             appropriately model animal vocal communication syntax. We
             address this need by developing a novel Bayesian
             semiparametric framework for inference in such datasets. Our
             approach is built on a novel class of mixed effects Markov
             transition models for the songs that accommodate exogenous
             influences of genotype and context as well as
             animal-specific heterogeneity. Crucial advantages of the
             proposed approach include its ability to provide insights
             into key scientific queries related to global and local
             influences of the exogenous predictors on the transition
             dynamics via automated tests of hypotheses. The methodology
             is illustrated using simulation experiments and the
             aforementioned motivating application in neuroscience.
             Supplementary materials for this article, including a
             standardized description of the materials available for
             reproducing the work, are available as an online
             supplement.},
   Doi = {10.1080/01621459.2018.1423986},
   Key = {fds335793}
}

@article{fds340937,
   Author = {Zhao, S and Engelhardt, BE and Mukherjee, S and Dunson,
             DB},
   Title = {Fast Moment Estimation for Generalized Latent Dirichlet
             Models},
   Journal = {Journal of the American Statistical Association},
   Volume = {113},
   Number = {524},
   Pages = {1528-1540},
   Year = {2018},
   Month = {October},
   url = {http://dx.doi.org/10.1080/01621459.2017.1341839},
   Abstract = {© 2018, © 2018 American Statistical Association. We
             develop a generalized method of moments (GMM) approach for
             fast parameter estimation in a new class of Dirichlet latent
             variable models with mixed data types. Parameter estimation
             via GMM has computational and statistical advantages over
             alternative methods, such as expectation maximization,
             variational inference, and Markov chain Monte Carlo. A key
             computational advantage of our method, Moment Estimation for
             latent Dirichlet models (MELD), is that parameter estimation
             does not require instantiation of the latent variables.
             Moreover, performance is agnostic to distributional
             assumptions of the observations. We derive population moment
             conditions after marginalizing out the sample-specific
             Dirichlet latent variables. The moment conditions only
             depend on component mean parameters. We illustrate the
             utility of our approach on simulated data, comparing results
             from MELD to alternative methods, and we show the promise of
             our approach through the application to several
             datasets. Supplementary materials for this article are
             available online.},
   Doi = {10.1080/01621459.2017.1341839},
   Key = {fds340937}
}

@article{fds341344,
   Author = {Canale, A and Durante, D and Dunson, DB},
   Title = {Convex mixture regression for quantitative risk
             assessment.},
   Journal = {Biometrics},
   Volume = {74},
   Number = {4},
   Pages = {1331-1340},
   Year = {2018},
   Month = {December},
   url = {http://dx.doi.org/10.1111/biom.12917},
   Abstract = {There is wide interest in studying how the distribution of a
             continuous response changes with a predictor. We are
             motivated by environmental applications in which the
             predictor is the dose of an exposure and the response is a
             health outcome. A main focus in these studies is inference
             on dose levels associated with a given increase in risk
             relative to a baseline. In addressing this goal, popular
             methods either dichotomize the continuous response or focus
             on modeling changes with the dose in the expectation of the
             outcome. Such choices may lead to information loss and
             provide inaccurate inference on dose-response relationships.
             We instead propose a Bayesian convex mixture regression
             model that allows the entire distribution of the health
             outcome to be unknown and changing with the dose. To balance
             flexibility and parsimony, we rely on a mixture model for
             the density at the extreme doses, and express the
             conditional density at each intermediate dose via a convex
             combination of these extremal densities. This representation
             generalizes classical dose-response models for quantitative
             outcomes, and provides a more parsimonious, but still
             powerful, formulation compared to nonparametric methods,
             thereby improving interpretability and efficiency in
             inference on risk functions. A Markov chain Monte Carlo
             algorithm for posterior inference is developed, and the
             benefits of our methods are outlined in simulations, along
             with a study on the impact of dde exposure on gestational
             age.},
   Doi = {10.1111/biom.12917},
   Key = {fds341344}
}

@article{fds342829,
   Author = {Zhang, Z and Descoteaux, M and Dunson, DB},
   Title = {Nonparametric Bayes Models of Fiber Curves Connecting Brain
             Regions},
   Journal = {Journal of the American Statistical Association},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1080/01621459.2019.1574582},
   Abstract = {© 2019, © 2019 American Statistical Association. In
             studying structural inter-connections in the human brain, it
             is common to first estimate fiber bundles connecting
             different regions relying on diffusion MRI. These fiber
             bundles act as highways for neural activity. Current
             statistical methods reduce the rich information into an
             adjacency matrix, with the elements containing a count of
             fibers or a mean diffusion feature along the fibers. The
             goal of this article is to avoid discarding the rich
             geometric information of fibers, developing flexible models
             for characterizing the population distribution of fibers
             between brain regions of interest within and across
             different individuals. We start by decomposing each fiber
             into a rotation matrix, shape and translation from a global
             reference curve. These components are viewed as data lying
             on a product space composed of different Euclidean spaces
             and manifolds. To nonparametrically model the distribution
             within and across individuals, we rely on a hierarchical
             mixture of product kernels specific to the component spaces.
             Taking a Bayesian approach to inference, we develop
             efficient methods for posterior sampling. The approach
             automatically produces clusters of fibers within and across
             individuals. Applying the method to Human Connectome Project
             data, we find interesting relationships between brain fiber
             geometry and reading ability. Supplementary materials for
             this article, including a standardized description of the
             materials available for reproducing the work, are available
             as an online supplement.},
   Doi = {10.1080/01621459.2019.1574582},
   Key = {fds342829}
}

@article{fds342828,
   Author = {Wang, L and Zhang, Z and Dunson, D},
   Title = {Common and individual structure of brain
             networks},
   Journal = {The Annals of Applied Statistics},
   Volume = {13},
   Number = {1},
   Pages = {85-112},
   Year = {2019},
   Month = {January},
   url = {http://dx.doi.org/10.1214/18-AOAS1193},
   Abstract = {© Institute of Mathematical Statistics, 2019. This article
             focuses on the problem of studying shared- and
             individualspecific structure in replicated networks or
             graph-valued data. In particular, the observed data consist
             of n graphs, G i , i = 1, . . ., n, with each graph
             consisting of a collection of edges between V nodes. In
             brain connectomics, the graph for an individual corresponds
             to a set of interconnections among brain regions. Such data
             can be organized as a V × V binary adjacency matrix Ai for
             each i, with ones indicating an edge between a pair of nodes
             and zeros indicating no edge. When nodes have a shared
             meaning across replicates i = 1, . . ., n, it becomes of
             substantial interest to study similarities and differences
             in the adjacency matrices. To address this problem, we
             propose a method to estimate a common structure and
             low-dimensional individualspecific deviations from
             replicated networks. The proposed Multiple GRAph
             Factorization (M-GRAF) model relies on a logistic regression
             mapping combined with a hierarchical eigenvalue
             decomposition. We develop an efficient algorithm for
             estimation and study basic properties of our approach.
             Simulation studies show excellent operating characteristics
             and we apply the method to human brain connectomics
             data.},
   Doi = {10.1214/18-AOAS1193},
   Key = {fds342828}
}

@article{fds342197,
   Author = {Wang, L and Zhang, Z and Dunson, D},
   Title = {Symmetric Bilinear Regression for Signal Subgraph
             Estimation},
   Journal = {Ieee Transactions on Signal Processing},
   Volume = {67},
   Number = {7},
   Pages = {1929-1940},
   Year = {2019},
   Month = {April},
   url = {http://dx.doi.org/10.1109/TSP.2019.2899818},
   Abstract = {© 1991-2012 IEEE. There is an increasing interest in
             learning a set of small outcome-relevant subgraphs in
             network-predictor regression. The extracted signal subgraphs
             can greatly improve the interpretation of the association
             between the network predictor and the response. In brain
             connectomics, the brain network for an individual
             corresponds to a set of interconnections among brain regions
             and there is a strong interest in linking the brain
             connectome to human cognitive traits. Modern neuroimaging
             technology allows a very fine segmentation of the brain,
             producing very large structural brain networks. Therefore,
             accurate and efficient methods for identifying a set of
             small predictive subgraphs become crucial, leading to
             discovery of key interconnected brain regions related to the
             trait and important insights on the mechanism of variation
             in human cognitive traits. We propose a symmetric bilinear
             model with $L-1$ penalty to search for small clique
             subgraphs that contain useful information about the
             response. A coordinate descent algorithm is developed to
             estimate the model where we derive analytical solutions for
             a sequence of conditional convex optimizations. Application
             of this method on human connectome and language
             comprehension data shows interesting discovery of relevant
             interconnections among several small sets of brain regions
             and better predictive performance than competitors.},
   Doi = {10.1109/TSP.2019.2899818},
   Key = {fds342197}
}

@article{fds342827,
   Author = {Zhang, Z and Allen, GI and Zhu, H and Dunson, D},
   Title = {Tensor network factorizations: Relationships between brain
             structural connectomes and traits.},
   Journal = {Neuroimage},
   Volume = {197},
   Pages = {330-343},
   Year = {2019},
   Month = {April},
   url = {http://dx.doi.org/10.1016/j.neuroimage.2019.04.027},
   Abstract = {Advanced brain imaging techniques make it possible to
             measure individuals' structural connectomes in large cohort
             studies non-invasively. Given the availability of large
             scale data sets, it is extremely interesting and important
             to build a set of advanced tools for structural connectome
             extraction and statistical analysis that emphasize both
             interpretability and predictive power. In this paper, we
             developed and integrated a set of toolboxes, including an
             advanced structural connectome extraction pipeline and a
             novel tensor network principal components analysis (TN-PCA)
             method, to study relationships between structural
             connectomes and various human traits such as alcohol and
             drug use, cognition and motion abilities. The structural
             connectome extraction pipeline produces a set of connectome
             features for each subject that can be organized as a tensor
             network, and TN-PCA maps the high-dimensional tensor network
             data to a lower-dimensional Euclidean space. Combined with
             classical hypothesis testing, canonical correlation analysis
             and linear discriminant analysis techniques, we analyzed
             over 1100 scans of 1076 subjects from the Human Connectome
             Project (HCP) and the Sherbrooke test-retest data set, as
             well as 175 human traits measuring different domains
             including cognition, substance use, motor, sensory and
             emotion. The test-retest data validated the developed
             algorithms. With the HCP data, we found that structural
             connectomes are associated with a wide range of traits,
             e.g., fluid intelligence, language comprehension, and motor
             skills are associated with increased cortical-cortical brain
             structural connectivity, while the use of alcohol, tobacco,
             and marijuana are associated with decreased
             cortical-cortical connectivity. We also demonstrated that
             our extracted structural connectomes and analysis method can
             give superior prediction accuracies compared with
             alternative connectome constructions and other tensor and
             network regression methods.},
   Doi = {10.1016/j.neuroimage.2019.04.027},
   Key = {fds342827}
}

@article{fds342830,
   Author = {Niu, M and Cheung, P and Lin, L and Dai, Z and Lawrence, N and Dunson,
             D},
   Title = {Intrinsic Gaussian processes on complex constrained
             domains},
   Journal = {Journal of the Royal Statistical Society: Series B
             (Statistical Methodology)},
   Volume = {81},
   Number = {3},
   Pages = {603-627},
   Year = {2019},
   Month = {July},
   url = {http://dx.doi.org/10.1111/rssb.12320},
   Abstract = {© 2019 Royal Statistical Society We propose a class of
             intrinsic Gaussian processes (GPs) for interpolation,
             regression and classification on manifolds with a primary
             focus on complex constrained domains or irregularly shaped
             spaces arising as subsets or submanifolds of R, R2, R3 and
             beyond. For example, intrinsic GPs can accommodate spatial
             domains arising as complex subsets of Euclidean space.
             Intrinsic GPs respect the potentially complex boundary or
             interior conditions as well as the intrinsic geometry of the
             spaces. The key novelty of the approach proposed is to
             utilize the relationship between heat kernels and the
             transition density of Brownian motion on manifolds for
             constructing and approximating valid and computationally
             feasible covariance kernels. This enables intrinsic GPs to
             be practically applied in great generality, whereas existing
             approaches for smoothing on constrained domains are limited
             to simple special cases. The broad utilities of the
             intrinsic GP approach are illustrated through simulation
             studies and data examples.},
   Doi = {10.1111/rssb.12320},
   Key = {fds342830}
}


%% Papers Submitted   
@article{fds70573,
   Author = {B. Cai and D.B. Dunson},
   Title = {Variable selection in nonparametric random effects
             models},
   Journal = {submitted},
   Year = {2007},
   Key = {fds70573}
}

@article{fds70581,
   Author = {L.Wang and D.B. Dunson},
   Title = {Bayesian isotonic density regression},
   Year = {2007},
   Key = {fds70581}
}

@article{fds151355,
   Author = {R. Mitra and D.B. Dunson},
   Title = {Two level stochastic search variable selection in GLMs with
             missing predictors},
   Year = {2008},
   Key = {fds151355}
}


%% Chapters   
@misc{fds340365,
   Author = {Weinberg, CR and Dunson, DB},
   Title = {Some issues in assessing human fertility},
   Pages = {42-49},
   Booktitle = {Statistics in the 21st Century},
   Year = {2001},
   Month = {January},
   ISBN = {1584882727},
   Abstract = {© 2002 by American Statistical Association. One of the
             pleasures of working as an applied statistician is the
             awareness it brings of the wide diversity of scientific
             fields to which our profession contributes critical concepts
             and methods. My own awareness was enhanced by accepting the
             invitation from the editors of JASA to serve as guest editor
             for this section of vignettes celebrating the significant
             contributions made by statisticians to the life and medical
             sciences in the 20th century. The goal of the project was
             not an encyclopedic catalog of all the major developments,
             but rather a sampling of some of the most interesting work.
             Of the 12 vignettes, 10 focus on particular areas of
             application: environmetrics, wildlife populations, animal
             breeding, human fertility, toxicology, medical diagnosis,
             clinical trials, environmental epidemiology, statistical
             genetics, and molecular biology. The two vignettes that
             begin the series focus more on methods that have had, or
             promise to have, impact across a range of subject matter
             areas: survival analysis and causal analysis.},
   Key = {fds340365}
}

@misc{fds257825,
   Author = {Dunson, DB and Bhattacharya, A and Griffin, JE},
   Title = {Nonparametric Bayes Regression and Classification Through
             Mixtures of Product Kernels},
   Volume = {9780199694587},
   Pages = {145-164},
   Booktitle = {Bayesian Statistics 9},
   Publisher = {Oxford University Press},
   Year = {2012},
   Month = {January},
   ISBN = {9780199694587},
   url = {http://dx.doi.org/10.1093/acprof:oso/9780199694587.003.0005},
   Abstract = {© Oxford University Press 2011. All rights reserved. It is
             routine in many fields to collect data having a variety of
             measurement scales and supports. For example, in biomedical
             studies for each patient one may collect functional data on
             a biomarker over time, gene expression values normalized to
             lie on a hypersphere to remove artifacts, clinical and
             demographic covariates and a health outcome. A common
             interest focuses on building predictive models, with
             parametric assumptions seldom supported by prior knowledge.
             Hence, it is most appropriate to define a prior with large
             support allowing the conditional distribution of the
             response given predictors to be unknown and changing
             flexibly across the predictor space not just in the mean but
             also in the variance and shape. Building on earlier work on
             Dirichlet process mixtures, we describe a simple and general
             strategy for inducing models for conditional distributions
             through discrete mixtures of product kernel models for joint
             distributions of predictors and response variables.
             Computation is straightforward and the approach can easily
             accommodate combining of widely disparate data types,
             including vector data in a Euclidean space, categorical
             observations, functions, images and manifold
             data.},
   Doi = {10.1093/acprof:oso/9780199694587.003.0005},
   Key = {fds257825}
}

 

dept@math.duke.edu
ph: 919.660.2800
fax: 919.660.2821

Mathematics Department
Duke University, Box 90320
Durham, NC 27708-0320