@Article{Cho_etal98,
author  ="R. J. Cho and M. J. Campbell and E. A. Winzeler and L. Steinmetz 
and A. Conway and L. Wodicka and T. G. Wolfsberg and A. E. Gabrielian and 
D. Landsman and D. J. Lockhart and R. W. Davis",
title   ={A genome-wide transcriptional analysis of the mitotic cell cycle},
journal ={Mol Cell},
volume  ={2},
number  ={1},
pages   ={65-73},
month   ={Jul},
year    ={1998},
abstract={Progression through the eukaryotic cell cycle is known to be both 
regulated and accompanied by periodic fluctuation in the expression levels 
of numerous genes. We report here the genome-wide characterization of mRNA 
transcript levels during the cell cycle of the budding yeast S. cerevisiae.
Cell cycle-dependent periodicity was found for 416 of the 6220 monitored
transcripts. More than 25\% of the 416 genes were found directly adjacent
to other genes in the genome that displayed induction in the same cell
cycle phase, suggesting a mechanism for local chromosomal organization in
global mRNA regulation. More than 60\% of the characterized genes that
displayed mRNA fluctuation have already been implicated in cell cycle
period-specific biological roles. Because more than 20\% of human proteins
display significant homology to yeast proteins, these results also link a
range of human genes to cell cycle period-specific biological functions.},
keyword ={Cell Cycle | Chromosome Mapping | Chromosomes, Fungal/*genetics | 
DNA, Fungal/genetics | *Gene Expression Regulation, Fungal | *Genome,
Fungal | Mitosis/*genetics | RNA, Fungal/*biosynthesis/genetics | RNA,
Messenger/*biosynthesis/genetics | Saccharomyces
cerevisiae/cytology/*genetics/metabolism | Support, U.S. Gov't, P.H.S. | 
*Transcription, Genetic | 1998/08/14 00:01},
address ={Department of Genetics, Stanford University School of Medicine,
California 94305, USA.},
note    ={(eng)},
pmid    ={9702192},
}

@Article{Hart_et2005,
  author = 	 "C. E. Hart and L. Sharenbroich and B. J. Bornstein and D. Trout and B. King and E. Mjolsness and B. J. Wold",
  title = 	 {A mathematical and computational framework for quantitative comparison and integration of large-scale gene expression data},
  journal = 	 {Nucleic Acids Research},
  volume  =      {33},
  number  =      {8},
  pages   =      {2580-2594},
  month   =      {May},
  year = 	 2005,
  address =      {Division of Biology, California Institute of Technology, Pasadena, CA 91125, USA.},
  abstract =     {Analysis of large-scale gene expression studies usually begins with gene clustering. A ubiquitous problem is that different algorithms applied to the same data inevitably give different results, and the differences are often substantial, involving a quarter or more of the genes analyzed. This raises a series of important but nettlesome questions: How are different clustering results related to each other and to the underlying data structure? Is one clustering objectively superior to another? Which differences, if any, are likely candidates to be biologically important? A systematic and quantitative way to address these questions is needed, together with an effective way to integrate and leverage expression results with other kinds of large-scale data and annotations. We developed a mathematical and computational framework to help quantify, compare, visualize and interactively mine clusterings. We show that by coupling confusion matrices with appropriate metrics (linear assignment and normalized mutual information scores), one can quantify and map differences between clusterings. A version of receiver operator characteristic analysis proved effective for quantifying and visualizing cluster quality and overlap. These methods, plus a flexible library of clustering algorithms, can be called from a new expandable set of software tools called CompClust 1.0 (http://woldlab.caltech.edu/compClust/). CompClust also makes it possible to relate expression clustering patterns to DNA sequence motif occurrences, protein-DNA interaction measurements and various kinds of functional annotations. Test analyses used yeast cell cycle data and revealed data structure not obvious under all algorithms. These results were then integrated with transcription motif and global protein-DNA interaction data to identify G1 regulatory modules.
},
  pmid = {15886390}
}


@Article{Roden_et2005,
  author = 	 "J. Roden and B. King and D. Trout and B. Wold and C. E. Hart",
  title = 	 {Mining Gene Expression Data by Interpreting Principal Components},
  journal = 	 {BMC Bionformatics (in press)},
  volume  =      {},
  number  =      {},
  pages   =      {},
  month   =      {July},
  year = 	 2005,
  address =      {Division of Biology, California Institute of Technology, Pasadena, CA 91125, USA.},
  abstract =     {Bioinformatics research aimed at analyzing microarray data has emphasized various algorithms that group together genes having similar patterns of expression over all conditions tested.  However, in many instances the biologically important goal is to identify relatively small sets of genes that share coherent expression across only some conditions, rather than all or most conditions as required in traditional clustering; e.g. genes that are highly up-regulated and/or down-regulated similarly across only a subset of conditions.  Equally important is the need to learn which conditions are the decisive ones in forming such gene sets of interest, and how they relate to diverse conditional covariates, such as disease diagnosis or prognosis.
We present a method for automatically identifying such candidate sets of biologically relevant genes using a combination of principal components analysis and information theoretic metrics.  To enable easy use of our methods, we have developed a data analysis package that facilitates visualization and subsequent data mining of the independent sources of significant variation present in gene microarray expression datasets (or in any other similarly structured high-dimensional dataset).  Using these methods the genes most affected by specific subsets of conditions (e.g. tissues, treatments, samples, etc.) are highlighted, guiding more informed hypothesis as to what might be driving the variation.
We provide an unsupervised data mining technique for diverse microarray expression datasets that is distinct from major methods now in routine use.  In test uses, it independently confirmed other methods by identifying biologically relevant genes that were identified by other techniques, such as support vector machines.  It has proven to be especially valuable in instances where there are many diverse conditions (10's to hundreds of different tissues or cell types), a situation in which many clustering or ordering algorithms become problematical.  This approach also shows promise in other topic domains such as multispectral imaging datasets. 

},
  pmid = {}
}

@Article{Mootha_etal03,
author  ="Mootha, V. K. and 
Lindgren, C. M. and 
Eriksson, K. F. and 
Subramanian, A. and 
Sihag, S. and 
Lehar, J. and 
Puigserver, P. and 
Carlsson, E. and 
Ridderstrale, M. and 
Laurila, E. and 
Houstis, N. and 
Daly, M. J. and 
Patterson, N. and 
Mesirov, J. P. and 
Golub, T. R. and 
Tamayo, P. and 
Spiegelman, B. and 
Lander, E. S. and 
Hirschhorn, J. N. and 
Altshuler, D. and 
Groop, L. C.",
title   ={PGC-1alpha-responsive genes involved in oxidative phosphorylation are coordinately downregulated in human diabetes},
journal ={Nat Genet},
volume  ={34},
number  ={3},
pages   ={267-273},
month   ={Jun},
year    ={2003},
}