\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage{pagecolor}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionBefore]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{3pt}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}
% \usepackage{fancyhdr}
% \pagestyle{fancy}
% \renewcommand{\sectionmark}[1]{
% \markright{\thesection\ #1}}
% \fancyhf{} % delete current header and footer
% \fancyhead[CO]{Third-generation sequencing and functional genomics}
% \fancyhead[CE]{Georgi K. Marinov}
% \fancyhead[RO,LE]{\thepage}
% \fancyhead[LO]{\bfseries\rightmark}
% \fancyhead[RE]{\bfseries\leftmark}
% \renewcommand{\headrulewidth}{0pt}
% \renewcommand{\footrulewidth}{0pt}
% \addtolength{\headheight}{0.5pt} % space for the rule
% \fancypagestyle{plain}{%
% }

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf Enhancer activity and biochemical signatures in ENCODE Encyclopedia Candidate Regulatory Elements}
\renewcommand\Authfont{\scshape\normalsize}
%\author[*]{ALL OTHER AUTHORS I'VE MISSED}
\author[1]{Gilberto DeSalvo}
\author[6]{Georgi K. Marinov}
\author[2]{Christopher Partridge}
\author[4,7]{Christopher M. Vockley}
\author[3]{Nergiz Dogan}
\author[8,9]{Ricardo Ramirez}
%\author[1]{Gordon Kwan}
\author[4,5]{Timothy E. Reddy}
\author[8,9]{Ali Mortazavi}
\author[3]{Ross C. Hardison}
\author[2]{Richard M. Myers}
\author[1]{Barbara J. Wold}
\renewcommand\Affilfont{\itshape\normalsize}
%\affil[*]{other affiliations}
\affil[1]{Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, California, USA.}
\affil[2]{HudsonAlpha Institute for Biotechnology, 601 Genome Way, Huntsville, AL 35806, USA}
\affil[3]{Dept. of Biochemistry and Molecular Biology, Penn State University, 304 Wartik Laboratory, University Park, PA 16802, USA}
\affil[4]{Center for Genomic \& Computational Biology, Duke University, Durham, NC 27708, USA}
\affil[5]{Department of Biostatistics \& Bioinformatics, Duke University, Durham, NC 27708, USA.}
\affil[6]{Independent researcher; Zahari Zograf 24 V 8, Pleven, Bulgaria}
\affil[7]{Department of Cell Biology, Duke University, Durham, NC 27708, USA}
\affil[8]{Department of Developmental and Cell Biology, University of California Irvine, Irvine, CA 92697-2300, USA}
\affil[9]{Center for Complex Biological Systems, University of California Irvine, Irvine, CA 92697-2280, USA}
\date{}

\begin{document}
\maketitle

% \let\thefootnote\relax\footnotetext{Corresponding author:
% Georgi K. Marinov. Division of Biology and Biological Engineering, California Institute of Technology. Pasadena, CA 91125.}
% \centerline{}
% \centerline{}
\begin{abstract}
\noindent {\normalsize \textbf{An aspiration for functional genomics is to identify all cis-acting regulatory elements in the genome and define their activities.  Genome-wide transcription factor occupancy and chromatin state profiles are widely used to identify candidate regulatory elements (cREs), although accurately predicting their varied regulatory activities and cell type specificities has proved surprisingly difficult. Multiple large-scale efforts, including ENCODE and Epigenomic Roadmap projects, have generated rich standardized biochemical data from large numbers of diverse cell lines, tissues and developmental stages for human and mouse. These data collections are driving attempts to catalog the global cRE repertoire, including a new ENCODE Encyclopedia of Elements V1.0. In light of these expanded resources, we aimed to uniformly analyze a collection of pertinent enhancer tests for hundreds of individual constructs in five cell types, including both progenitor cell states and mature differentiated cell systems.  The elements were drawn from both TSS-proximal and far-distal locations; were designed to include the full-length cREs; and were selected using criteria that ranged from simple single-TF occupancy to integrative machine learning models. A few shared qualitative and quantitative themes emerged: roughly 50\% of elements occupied by at least one TF and possessing the basic ENCODE-style cRE signature (DNAse$^+$/H3K27ac$^+$) showed enhancer activity. Having sampled across the full range of TF/DHS/H3K27Ac signal, we could project that the vast majority of enhancers genome-wide have very modest activity levels by this assay. We compared these results with a massively parallel STARR-seq design for glucocorticoid sensitive cREs and found them largely similar.  We also uncovered a set of developmentally distinct distal elements marked in progenitor cells to predict future, rather than contemporaneous, enhancer activity upon differentiation. Finally, developmentally privileged loci possessing multiple, highly regulated genes were enriched for enhancer cREs with exceptionally strong individual activity despite having only modest biochemical signatures. }}
% \centerline{}
% \centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

The complete and accurate understanding of the relationship between the human genome and its corresponding phenotypes requires the comprehensive characterization of its compendium of functional elements. The results of the many genome-wide epigenomic and transcriptomic studies carried out over the last decade reveal a remarkable picture, in which non-coding regulatory elements constitute the bulk of such functional regions in the genome\cite{mouseENCODE2014,ENCODE2012}, with the expression of each gene (protein coding or non-coding) being controlled by the integrated output of multiple proximal and distal enhancer, insulator and silencing elements.

The genome-wide mapping and characterization of non-coding regulatory elements is thus a major goal of the field, and features prominently among the objectives of the \textbf{ENC}yclopedia \textbf{O}f \textbf{D}NA \textbf{E}lements (ENCODE) consortium\cite{ENCODE101}. However, achieving it, although greatly aided by the advent of high-throughput sequencing and epigenomic tools, is still not a simple task.

The biochemical activity associated with the function of regulatory elements results in certain biochemical signatures that can be captured by epigenomic assays. For example, active promoters in eukaryotes are classically associated with the trimethylation of lysine 4 on histone 3 (H3K4me3)\cite{Vermeulen2010}, as well as other biochemical signatures, such as DNAse hypersensitivity\cite{Thurman2012}. Active enhancer elements have been proposed to exhibit their own biochemical signature, featuring DNAse hypersensitivity, H3K27ac, H3K4me1 and occupancy by the p300 acetyltransferase\cite{Creyghton2010,RadaIglesias2011,Thurman2012} as well as by sequence-specific transcription factors.

These biochemical signatures enable the compilation of lists of candidate functional elements (cREs), but they do not on their own allow the conclusive identification of any given element as functional. While functional regulatory elements exhibit characteristic biochemical signatures, the reverse (that the presence of a biochemical signature necessarily means function) cannot be inferred straightforwardly\cite{Kellis2014}. Such inferences are further complicated by the observation that biochemical signatures are not binary but instead exist on a continuum between strong outstanding features, about which little doubt can exist, on one hand, and what is probably biochemical noise on the other. For example, it is far from clear that all transcription factor binding sites that can be reproducibly identified using ChIP-seq and related techniques are in fact functional REs\cite{Fisher2012}. Therefore, individual cREs in the lists compiled by efforts such as the ENCODE and mouseENCODE consortia\cite{ENCODE2012,mouseENCODE2014} have to be directly tested for function and characterized in detail.

The ultimate functional characterization of cREs will involve a combination of loss-of-function assays and direct assays for activity. The former have been until recently technically challenging, but are becoming more commonplace with the advent of large-scale CRISPR/Cas9-mediated mutagenesis techniques\cite{Korkmaz2016,Fulco201}. Nevertheless, most work in the field has been based on testing cREs for regulatory activity using an exogenous plasmid construct combining a cRE, a promoter and a reporter % we need an old-school reference for transfectons. 

Classically, such testing has been done by cloning individual cREs into plasmids (or other vectors) and then assaying the expression of the reporter gene (luciferase activity in cell lines or staining for LacZ activity in embryos\cite{Visel2007,Fisher2012}). Numerous developmental enhancers have been characterized following that approach\cite{Visel2007,Visel2009,May2011}, usually starting from lists of cREs derived from p300 ChIP-seq datasets. However, \hl{XXX explanation of issues with system XXX}. 

High-throughput sequencing has enabled the development of assays that go beyond the testing of individual cREs, one by one; instead very large numbers of sequences are analyzed in parallel, with the readout being based on sequencing DNA tags associated with the cRE or of the cRE itself. These are usually referred to as Massively Parallel Reporter Assays (MPRA)\cite{Inoue2015}, and a number of variations of the principle have been successfully applied to a multitude of biological problems and systems in the last few years\cite{Patwardhan2009,Kinney2010,Kwasnieski2012,Melnikov2012,Patwardhan2012,FIREWACh2014}, including the question of testing cREs for activity within the context of the ENCODE Consortium\cite{Kheradpour2013,Kwasnieski2014,Ernst2016}. However, several issues complicate the interpretations of MPRA experiments.

First, the nature of MPRA designs is such that the elements tested are very short, in the 80-250bp neighborhood. This is significantly shorter than tens of thousands of blocks of conserved noncoding sequence that can be identified in the human genome by comparative genomic analysis (Supplementary Figure \ref{FigS1}A and B). Thus to what extent complete REs are assayed and what the corresponding false negative rate is have always been an obvious concern regarding MPRAs. 

Second, given that it is very difficult to control the number of constructs going into each individual cell in transfection experiments, and that an MPRA features large numbers of different cREs being tested in the same time, there is significant potential for crosstalk between active and inactive cREs that end up in the same cell, resulting in numerous false positives. 

While the latter concern can be alleviated to an extent through the use of genome-integrated MPRA constructs \cite{Maricque2016,Inoue2016}, the short length of constructs tested remains a significant issue, and one that might be behind the low positive rates reported by MPRAs in the past\cite{Vockley2016}. 

There is therefore a major gap in the field that needs to be filled by testing a large number of individual constructs of large size (500-1000 bp) using a traditional luciferase assay, and using the resulting daya to examine the performance of biochemical signature predictions.

To this end, we tested hundreds of constructs of such length (Supplementary Figure \ref{FigS1}C and D) in several mammalian cell line systems (Figure \ref{Fig1}). \hl{XXXX FINISH XXX}

\hl{XXX SUMMARY OF RESULTS AT END OF INTRO, TO BE WRITTEN XX}

% \cite{Vockley2016}

% \cite{STARRseq2013}

\section*{Results}

\subsection*{Large-scale functional assay testing of cREs}

\end{multicols}

\begin{FPfigure}
\begin{center}
\includegraphics[width=15.5cm]{Fig1-cartoon-V3.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Outline of experimental design, cRE selection and testing}. \hl{XXXX details XXXX} } 
\label{Fig1}
\end{FPfigure}

\begin{multicols}{2}

\subsection*{XXXX}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig2-DNAse-H3K27ac.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig2}
\end{figure*}

\end{multicols}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig3-TAL1-GATA-heatmaps.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig3}
\end{figure*}

\subsection*{XXXX}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig4.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig4}
\end{figure*}

\subsection*{XXXX}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig5-C2C12-funcassay.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig5}
\end{figure*}

\subsection*{XXXX}

\end{multicols}

\begin{FPfigure}
\begin{center}
\includegraphics[width=17.5cm]{Fig6-G1E-funcassay.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. caption.}
\label{Fig6}
\end{FPfigure}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=16cm]{Fig7-predictivity-V3.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig7}
\end{figure*}

\section*{Discussion}

Promoter-enhancer specificity \cite{Zabidi2015}

integration in genome \cite{Maricque2016,Inoue2016,FIREWACh2014}

\section*{Methods}

Except where otherwise stated, all analyses were performed using custom-written python scripts. 

\subsection*{ChIP-seq experiments}

Chromatin from 2 $\times$ 10$^7$ C2C12 Myoblast or Myocyte nuclei was fragmented using a Misonix probe tip sonicator and Immunoprecipitated by robotic ChIP pipeline. \cite{Gasper2016} The enriched fragments were built into adapter ligated and single PCR amplified libraries then sequenced on an HiSeq2500 to $\geq$15m reads (Illumina). 

Chromatin immunoprecipitation in A549 cells was performed as previously described (Reddy et al. 2009) using 2 $\times$ 10$^7$ A549 cells per replicate. Cells were sonicated using a Bioruptor XL (Diagenode) on the high setting until the resulting chromatin was fragmented to a median fragment size of 250 nt as assayed by agarose gel electrophoresis. ChIP was performed using the 5 $\mu$g rabbit polyclonal GR antibody (Santa Cruz Biotechnology sc-1003), and 200 $\mu$l of magnetic sheep anti-rabbit beads (Life Technologies M-280). After reversal
of formaldehyde crosslinks at $65\,^{\circ}\mathrm{C}$ overnight, DNA was purified using MinElute DNA purification columns (QIAGEN). Illumina sequencing libraries were then generated using the Apollo 324 liquid handling platform according to manufacturer's specifications (Wafergen). 

\subsection*{DNAseHS experiments}

NEED INFO FROM ALI/RICARDO RAMIREZ

\subsection*{Cloning and DNA purification}

Candidate regulatory elements were either PCR amplified from female balb/c purified mouse genomic DNA (Switchgear Genomics) or synthesized (Genscript). The DNA was cloned into a reporter vector 5$^\prime$ of a custom TK promoter (SwitchGear Genomics) driving an high turnover sequence optimized luciferase reporter gene. The DNA was purified using miniprep kits (Quiagen) and standardized to 30 ng/$\mu$L using fluorometry measurements (Qubit HS DNA).

\subsection*{C2C12/10T1/2 Cell culture}

C2C12 myoblasts %and 10T1/2 fibroblasts 
were maintained and seeded for transfection in 20$\%$ FBS supplemented DMEM medium. Once at $>$80$\%$ confluency the myocyte destined C2C12 cells were differentiated %, or mock differentiated for 10T1/2s 
using 2$\%$ horse serum and 1 $\mu$M insulin in DMEM.

\subsection*{C2C12/10T1/2 Functional assays}
C2C12 Myoblast destined 96-well delta surface plates (NUNC) were seeded in quadruplicate 12 hours before transfection at a concentration of 2500 cells/well while C2C12 Myocyte destined plated received 3500 cells/well. %10T1/2 cells are seeded at 5000 cells per well. 
50ng of construct DNA per replicate were transfected in a robotic transfection assay using Lipofectamine LTX after a 5 minute incubation with a 1:16 dilution with the PLUS reagent(Thermo Fisher). 

The undifferentiated C2C12 myoblast plates are lysed using a SteadyGlo kit and measured on a plate luminometer 24 hour post transfection. The C2C12 myocyte destined plates %and mock differentiated 10T1/2 mesodermal precursor 
were swapped into differentiation medium 12-16 hours post transfection using a robotic handling step ($>$80$\%$ confluency) and measured 24 hours later.

\subsection*{Functional Assay Data processing}
Luminometer data is ratioed to the basal promoter vector (relative assay activity) and enhancers were discriminated from non enhancers using Z-score analysis that compared the population of test element technical replicate values to a set of negative control vectors.

\subsection*{Genomic coordinate conversion}

The regions to be tested using functional assays were designed based on the \verb|mm8|; \verb|mm9| and \verb|hg19| versions of the mouse and human genomes, respectively. Conversion to \verb|mm10| and \verb|hg20| was carried out using the \verb|liftOver| tool from the UCSC Genome Browser Utilities.

\subsection*{STARR-seq experiments}

The STARR-seq experiments previously published by Vockley et al. \cite{Vockley2016} were used in this study. 

\subsection*{ChIP-seq data processing and analysis}

ChIP-seq reads were trimmed down to 36 bp in length and mapped against the \verb|hg20| (for human samples; the male or female version depending on the sex of the cell line the sample originated from) and \verb|mm10| (for mouse samples) using Bowtie \cite{Bowtie} (version 1.0.1) with the following settings: \verb|-v 2| \verb||-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. DNAse-seq reads were processed similarly except that they were trimmed down to 20bp for A549 samples and 36bp for C2C12 cells (due to differences in the experimental protocol used to generate the data).

Peak calling was carried out as follows. For DNAse and H3K27ac datasets, MACS2 \cite{Feng2012} (version 2.1.0) was run on individual replicates and on pseudoreplicates (generated by randomly splitting the pooled set of reads for both replicates into two) with relaxed settings (\verb|--to-large| \verb|-p 1e-1|). For H3K27ac control datasets were subjected to the same treatment (no background/control is available for DNAse data) The top 100,000 peaks from each replicate or pseudoreplicate (ranked by $q$-value) were then used as input into IDR \cite{Li2011}. The number of peaks above a given IDR threshold called as reproducible between true replicates ($N_t$) and between pseudoreplicates  ($N_p$) were recorded. Peak calling was then carried out on the pooled set of reads and the top $max(N_t,N_p)$ peaks were chosen as the final set of reproducible peaks. For point-source \cite{Pepke2009} datasets (transcription factors), peak calling was carried out following the same procedure but using SPP \cite{Kharchenko2008} (version 1.10.1), using the top 300K peaks as input to IDR.

Normalized enrichment scores were calculated for each region tested for functionality as follows. 

\subsection*{STARR-seq data processing and analysis}

% Estimation of Regulatory Activity in STARR-Seq Assayed Sites
% BAC STARR-seq output libraries were sequenced on an Illumina MiSeq Instrument using paired-end 25 bp reads. Reads were aligned to the hg19 reference genome sequence for the target BACs using Bowtie (Langmead and Salzberg 2012), paired end reads were converted to corresponding fragments, and the number of reads per fragment was counted for each replicate. The number of reads in each experiment was then normalized to the median of the number of reads per fragment divided by the geometric mean read count of that fragment. To estimate the significance of differences between EtOH and DEX treatment, a Wilcoxon signed-rank test was performed using a sliding 1 bp window across the target BACs. For each test, the normalized read counts for the fragments overlapping the window were compared between DEX and EtOH treatment conditions, and a p value was reported.

\subsection*{Conservation analysis}



\section*{Acknowledgments}

High throughput sequencing for the C2C12 ChIP-Seq samples was performed by Igor Antoshechkin at the Millard and Muriel Jacobs Genetics and Genomics Laboratory.

This material is based upon work supported by the National Science Foundation under Grant No. CNS-0521433.

\begin{thebibliography}{100}

\input{references}

\end{thebibliography}

\end{multicols}

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\clearpage

% \section*{Supplementary Methods}

% \section*{Supplementary Figures}

\begin{FPfigure}
\begin{center}
\includegraphics[width=15cm]{FigS1-region-CNE-length.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The length of cREs tested in this study and of conserved noncoding elements in the genome}. (A) The length of conserved noncoding regions in the human genome. The \texttt{phastCons100way} conservation track for the \texttt{hg20} version of the human genome was downloaded from the UCSC Genome Browser. Blocks of conservation, in which all nucleotides have \texttt{phastCons} scores higher than the indicated minimum (\texttt{phCons}), were identified, and then merged into larger regions if the length of the gaps between them was smaller than the indicated \texttt{maxGap} parameter. The distribution of the lengths of the resulting sets of regions was plotted. This approach captures the properties of enhancer elements observed in the genome, which often consist of multiple blocks of highly conserved sequences separated by gaps of less conserved sequences, resulting in an enhancer element of up to a few hundred base pairs in length or more. (B) Such an example is shown for the \textit{Acta1} gene in mouse. (C) Distribution of functional assay construct lengths tested in this study in C2C12 cells. (D) Distribution of functional assay construct lengths tested in this study in G1E cells.} 
\label{FigS1}
\end{FPfigure}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{FigS2-G1E-activity-vs-RPM-conservation.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{FigS2}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{FigS3-G1E-activity-vs-RPM-TF-binding.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{FigS3}
\end{figure*}

\end{document}