\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage{pagecolor}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionBefore]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{3pt}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}
% \usepackage{fancyhdr}
% \pagestyle{fancy}
% \renewcommand{\sectionmark}[1]{
% \markright{\thesection\ #1}}
% \fancyhf{} % delete current header and footer
% \fancyhead[CO]{Third-generation sequencing and functional genomics}
% \fancyhead[CE]{Georgi K. Marinov}
% \fancyhead[RO,LE]{\thepage}
% \fancyhead[LO]{\bfseries\rightmark}
% \fancyhead[RE]{\bfseries\leftmark}
% \renewcommand{\headrulewidth}{0pt}
% \renewcommand{\footrulewidth}{0pt}
% \addtolength{\headheight}{0.5pt} % space for the rule
% \fancypagestyle{plain}{%
% }

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf Enhancer activity and biochemical signatures in ENCODE Encyclopedia Candidate Regulatory Elements}
\renewcommand\Authfont{\scshape\normalsize}
%\author[*]{ALL OTHER AUTHORS I'VE MISSED}
\author[1]{Gilberto DeSalvo}
\author[6]{Georgi K. Marinov}
\author[2]{Christopher Partridge}
\author[4,7]{Christopher M. Vockley}
\author[3]{Nergiz Dogan}
\author[8,9]{Ricardo Ramirez}
%\author[1]{Gordon Kwan}
\author[4,5]{Timothy E. Reddy}
\author[8,9]{Ali Mortazavi}
\author[3]{Ross C. Hardison}
\author[2]{Richard M. Myers}
\author[1]{Barbara J. Wold}
\renewcommand\Affilfont{\itshape\normalsize}
%\affil[*]{other affiliations}
\affil[1]{Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, California, USA.}
\affil[2]{HudsonAlpha Institute for Biotechnology, 601 Genome Way, Huntsville, AL 35806, USA}
\affil[3]{Dept. of Biochemistry and Molecular Biology, Penn State University, 304 Wartik Laboratory, University Park, PA 16802, USA}
\affil[4]{Center for Genomic \& Computational Biology, Duke University, Durham, NC 27708, USA}
\affil[5]{Department of Biostatistics \& Bioinformatics, Duke University, Durham, NC 27708, USA.}
\affil[6]{Independent researcher; Zahari Zograf 24 V 8, Pleven, Bulgaria}
\affil[7]{Department of Cell Biology, Duke University, Durham, NC 27708, USA}
\affil[8]{Department of Developmental and Cell Biology, University of California Irvine, Irvine, CA 92697-2300, USA}
\affil[9]{Center for Complex Biological Systems, University of California Irvine, Irvine, CA 92697-2280, USA}
\date{}

\begin{document}
\maketitle

% \let\thefootnote\relax\footnotetext{Corresponding author:
% Georgi K. Marinov. Division of Biology and Biological Engineering, California Institute of Technology. Pasadena, CA 91125.}
% \centerline{}
% \centerline{}
\begin{abstract}
\noindent {\normalsize \textbf{The comprehensive characterization of functional genomic elements in the human genome is a task of fundamental importance for achieving a complete understanding of how the genome functions. Large-scale efforts such as the ENCODE Consortium Project have generated large numbers of datasets profiling biochemical activity on a genome-wide scale in a wide diversity of cell lines and tissues aiming to resolve that challenge; these datasets and their integrated analysis are invaluable tools for identifying candidate regulatory elements (cREs). However, they alone do not provide conclusive evidence that any individual cRE is functional. In addition, the relationship between biochemical signatures and functional activity is unclear. Previous efforts have addressed these questions using various massively parallel reporter assay designs or functional testing of individual elements. However, because of a cominbation of insufficient resolution (due to using too small or too large constructs) and confouding experimental design factors, a number of important questions remains unresolved. To this end, we tested hundreds of individual constructs, each of which is large enough to capture a complete cREs, using luciferase reporter assays in several different mammalian systems: muscle differentiation in mouse, erythroid differentiation in mouse, and the human erythroid K562 % and hepatocyte HepG2 immortalized cell lines
immortalized cell line. We complement these datasets with a massively parallel STARR-seq-based characterization of glucocorticoid receptor (GR) response in human A549 cells. We use the resulting data to evaluate the functional predictivity of various biochemical signatures. We find that approximately 50\% of sites exhibiting DNAse hypersensitivity and H3K27ac are active regulatory elements in the K562 and differentiated erythroid and myocyte cells; in contrast, in undifferentiated myoblasts, only 25\% of sites exhibiting biochemical activity are functional. Approximately 28\% of such regions tested using STARR-seq in A549 cells appear to be active enhancers. Finally, we note that the majority of functional REs are characterized by modest biochemical signatures, especially near loci that are differentially regulated during development.}}
% \centerline{}
% \centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

The complete and accurate understanding of the relationship between the human genome and its corresponding phenotypes requires the comprehensive characterization of its compendium of functional elements. The results of the many genome-wide epigenomic and transcriptomic studies carried out over the last decade reveal a remarkable picture, in which non-coding regulatory elements constitute the bulk of such functional regions in the genome\cite{mouseENCODE2014,ENCODE2012}, with the expression of each gene (protein coding or non-coding) being controlled by the integrated output of multiple proximal and distal enhancer, insulator and silencing elements.

The genome-wide mapping and characterization of non-coding regulatory elements is thus a major goal of the field, and features prominently among the objectives of the \textbf{ENC}yclopedia \textbf{O}f \textbf{D}NA \textbf{E}lements (ENCODE) consortium\cite{ENCODE101}. However, achieving it, although greatly aided by the advent of high-throughput sequencing and epigenomic tools, is still not a simple task.

The biochemical activity associated with the function of regulatory elements results in certain biochemical signatures that can be captured by epigenomic assays. For example, active promoters in eukaryotes are classically associated with the trimethylation of lysine 4 on histone 3 (H3K4me3)\cite{Vermeulen2010}, as well as other biochemical signatures, such as DNAse hypersensitivity\cite{Thurman2012}. Active enhancer elements have been proposed to exhibit their own biochemical signature, featuring DNAse hypersensitivity, H3K27ac, H3K4me1 and occupancy by the p300 acetyltransferase\cite{Creyghton2010,RadaIglesias2011,Thurman2012} as well as by sequence-specific transcription factors.

These biochemical signatures enable the compilation of lists of candidate functional elements (cREs), but they do not on their own allow the conclusive identification of any given element as functional. While functional regulatory elements exhibit characteristic biochemical signatures, the reverse (that the presence of a biochemical signature necessarily means function) cannot be inferred straightforwardly\cite{Kellis2014}. Such inferences are further complicated by the observation that biochemical signatures are not binary but instead exist on a continuum between strong outstanding features, about which little doubt can exist, on one hand, and what is probably biochemical noise on the other. For example, it is far from clear that all transcription factor binding sites that can be reproducibly identified using ChIP-seq and related techniques are in fact functional REs\cite{Fisher2012}. Therefore, individual cREs in the lists compiled by efforts such as the ENCODE and mouseENCODE consortia\cite{ENCODE2012,mouseENCODE2014} have to be directly tested for function and characterized in detail.

The ultimate functional characterization of cREs will involve a combination of loss-of-function assays and direct assays for activity. The former have been until recently technically challenging, but are becoming more commonplace with the advent of large-scale CRISPR/Cas9-mediated mutagenesis techniques\cite{Korkmaz2016,Fulco201}. Nevertheless, most work in the field has been based on testing cREs for regulatory activity using an exogenous plasmid construct combining a cRE, a promoter and a reporter % we need an old-school reference for transfectons. 

Classically, such testing has been done by cloning individual cREs into plasmids (or other vectors) and then assaying the expression of the reporter gene (luciferase activity in cell lines or staining for LacZ activity in embryos\cite{Visel2007,Fisher2012}). Numerous developmental enhancers have been characterized following that approach\cite{Visel2007,Visel2009,May2011}, usually starting from lists of cREs derived from p300 ChIP-seq datasets. However, \hl{XXX explanation of issues with system XXX}. 

High-throughput sequencing has enabled the development of assays that go beyond the testing of individual cREs, one by one; instead very large numbers of sequences are analyzed in parallel, with the readout being based on sequencing DNA tags associated with the cRE or of the cRE itself. These are usually referred to as Massively Parallel Reporter Assays (MPRA)\cite{Inoue2015}, and a number of variations of the principle have been successfully applied to a multitude of biological problems and systems in the last few years\cite{Patwardhan2009,Kinney2010,Kwasnieski2012,Melnikov2012,Patwardhan2012,FIREWACh2014}, including the question of testing cREs for activity within the context of the ENCODE Consortium\cite{Kheradpour2013,Kwasnieski2014,Ernst2016}. However, several issues complicate the interpretations of MPRA experiments.

First, the nature of MPRA designs is such that the elements tested are very short, in the 80-250bp neighborhood. This is significantly shorter than tens of thousands of blocks of conserved noncoding sequence that can be identified in the human genome by comparative genomic analysis (Supplementary Figure \ref{FigS1}A and B). Thus to what extent complete REs are assayed and what the corresponding false negative rate is have always been an obvious concern regarding MPRAs. 

Second, given that it is very difficult to control the number of constructs going into each individual cell in transfection experiments, and that an MPRA features large numbers of different cREs being tested in the same time, there is significant potential for crosstalk between active and inactive cREs that end up in the same cell, resulting in numerous false positives. 

While the latter concern can be alleviated to an extent through the use of genome-integrated MPRA constructs \cite{Maricque2016,Inoue2016}, the short length of constructs tested remains a significant issue, and one that might be behind the low positive rates reported by MPRAs in the past\cite{Vockley2016}. 

There is therefore a major gap in the field that needs to be filled by testing a large number of individual constructs of large size (500-1000 bp) using a traditional luciferase assay, and using the resulting daya to examine the performance of biochemical signature predictions.

To this end, we tested hundreds of constructs of such length (Supplementary Figure \ref{FigS1}C and D) in several mammalian cell line systems (Figure \ref{Fig1}). \hl{XXXX FINISH XXX}

\hl{XXX SUMMARY OF RESULTS AT END OF INTRO, TO BE WRITTEN XX}

% \cite{Vockley2016}

% \cite{STARRseq2013}

\section*{Results}

\end{multicols}

\begin{FPfigure}
\begin{center}
\includegraphics[width=15.5cm]{Fig1-cartoon-V3.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Outline of experimental design, cRE selection and testing}. \hl{XXXX details XXXX} } 
\label{Fig1}
\end{FPfigure}

\begin{multicols}{2}

\subsection*{XXXX}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig2-DNAse-H3K27ac.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig2}
\end{figure*}

\end{multicols}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig3-TAL1-GATA-heatmaps.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig3}
\end{figure*}

\subsection*{XXXX}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig4.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig4}
\end{figure*}

\subsection*{XXXX}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{Fig5-C2C12-funcassay.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig5}
\end{figure*}

\subsection*{XXXX}

\end{multicols}

\begin{FPfigure}
\begin{center}
\includegraphics[width=17.5cm]{Fig6-G1E-funcassay.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. caption.}
\label{Fig6}
\end{FPfigure}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=16cm]{Fig7-predictivity-V3.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig7}
\end{figure*}

\section*{Discussion}

Promoter-enhancer specificity \cite{Zabidi2015}

integration in genome \cite{Maricque2016,Inoue2016,FIREWACh2014}

\section*{Methods}

Except where otherwise stated, all analyses were performed using custom-written python scripts. 

\subsection*{ChIP-seq experiments}

Chromatin from 2 $\times$ 10$^7$ C2C12 Myoblast or Myocyte nuclei was fragmented using a Misonix probe tip sonicator and Immunoprecipitated by robotic ChIP pipeline. \cite{Gasper2016} The enriched fragments were built into adapter ligated and single PCR amplified libraries then sequenced on an HiSeq2500 to $\geq$15m reads (Illumina). 

Chromatin immunoprecipitation in A549 cells was performed as previously described (Reddy et al. 2009) using 2 $\times$ 10$^7$ A549 cells per replicate. Cells were sonicated using a Bioruptor XL (Diagenode) on the high setting until the resulting chromatin was fragmented to a median fragment size of 250 nt as assayed by agarose gel electrophoresis. ChIP was performed using the 5 $\mu$g rabbit polyclonal GR antibody (Santa Cruz Biotechnology sc-1003), and 200 $\mu$l of magnetic sheep anti-rabbit beads (Life Technologies M-280). After reversal
of formaldehyde crosslinks at $65\,^{\circ}\mathrm{C}$ overnight, DNA was purified using MinElute DNA purification columns (QIAGEN). Illumina sequencing libraries were then generated using the Apollo 324 liquid handling platform according to manufacturer's specifications (Wafergen). 

\subsection*{DNAseHS experiments}

NEED INFO FROM ALI/RICARDO RAMIREZ

\subsection*{Cloning and DNA purification}

Candidate regulatory elements were either PCR amplified from female balb/c purified mouse genomic DNA (Switchgear Genomics) or synthesized (Genscript). The DNA was cloned into a reporter vector 5$^\prime$ of a custom TK promoter (SwitchGear Genomics) driving an high turnover sequence optimized luciferase reporter gene. The DNA was purified using miniprep kits (Quiagen) and standardized to 30 ng/$\mu$L using fluorometry measurements (Qubit HS DNA).

\subsection*{C2C12/10T1/2 Cell culture}

C2C12 myoblasts %and 10T1/2 fibroblasts 
were maintained and seeded for transfection in 20$\%$ FBS supplemented DMEM medium. Once at $>$80$\%$ confluency the myocyte destined C2C12 cells were differentiated %, or mock differentiated for 10T1/2s 
using 2$\%$ horse serum and 1 $\mu$M insulin in DMEM.

\subsection*{C2C12/10T1/2 Functional assays}
C2C12 Myoblast destined 96-well delta surface plates (NUNC) were seeded in quadruplicate 12 hours before transfection at a concentration of 2500 cells/well while C2C12 Myocyte destined plated received 3500 cells/well. %10T1/2 cells are seeded at 5000 cells per well. 
50ng of construct DNA per replicate were transfected in a robotic transfection assay using Lipofectamine LTX after a 5 minute incubation with a 1:16 dilution with the PLUS reagent(Thermo Fisher). 

The undifferentiated C2C12 myoblast plates are lysed using a SteadyGlo kit and measured on a plate luminometer 24 hour post transfection. The C2C12 myocyte destined plates %and mock differentiated 10T1/2 mesodermal precursor 
were swapped into differentiation medium 12-16 hours post transfection using a robotic handling step ($>$80$\%$ confluency) and measured 24 hours later.

\subsection*{Functional Assay Data processing}
Luminometer data is ratioed to the basal promoter vector (relative assay activity) and enhancers were discriminated from non enhancers using Z-score analysis that compared the population of test element technical replicate values to a set of negative control vectors.

\subsection*{Genomic coordinate conversion}

The regions to be tested using functional assays were designed based on the \verb|mm8|; \verb|mm9| and \verb|hg19| versions of the mouse and human genomes, respectively. Conversion to \verb|mm10| and \verb|hg20| was carried out using the \verb|liftOver| tool from the UCSC Genome Browser Utilities.

\subsection*{STARR-seq experiments}

The STARR-seq experiments previously published by Vockley et al. \cite{Vockley2016} were used in this study. 

\subsection*{ChIP-seq data processing and analysis}

ChIP-seq reads were trimmed down to 36 bp in length and mapped against the \verb|hg20| (for human samples; the male or female version depending on the sex of the cell line the sample originated from) and \verb|mm10| (for mouse samples) using Bowtie \cite{Bowtie} (version 1.0.1) with the following settings: \verb|-v 2| \verb||-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. DNAse-seq reads were processed similarly except that they were trimmed down to 20bp for A549 samples and 36bp for C2C12 cells (due to differences in the experimental protocol used to generate the data).

Peak calling was carried out as follows. For DNAse and H3K27ac datasets, MACS2 \cite{Feng2012} (version 2.1.0) was run on individual replicates and on pseudoreplicates (generated by randomly splitting the pooled set of reads for both replicates into two) with relaxed settings (\verb|--to-large| \verb|-p 1e-1|). For H3K27ac control datasets were subjected to the same treatment (no background/control is available for DNAse data) The top 100,000 peaks from each replicate or pseudoreplicate (ranked by $q$-value) were then used as input into IDR \cite{Li2011}. The number of peaks above a given IDR threshold called as reproducible between true replicates ($N_t$) and between pseudoreplicates  ($N_p$) were recorded. Peak calling was then carried out on the pooled set of reads and the top $max(N_t,N_p)$ peaks were chosen as the final set of reproducible peaks. For point-source \cite{Pepke2009} datasets (transcription factors), peak calling was carried out following the same procedure but using SPP \cite{Kharchenko2008} (version 1.10.1), using the top 300K peaks as input to IDR.

Normalized enrichment scores were calculated for each region tested for functionality as follows. 

\subsection*{STARR-seq data processing and analysis}

% Estimation of Regulatory Activity in STARR-Seq Assayed Sites
% BAC STARR-seq output libraries were sequenced on an Illumina MiSeq Instrument using paired-end 25 bp reads. Reads were aligned to the hg19 reference genome sequence for the target BACs using Bowtie (Langmead and Salzberg 2012), paired end reads were converted to corresponding fragments, and the number of reads per fragment was counted for each replicate. The number of reads in each experiment was then normalized to the median of the number of reads per fragment divided by the geometric mean read count of that fragment. To estimate the significance of differences between EtOH and DEX treatment, a Wilcoxon signed-rank test was performed using a sliding 1 bp window across the target BACs. For each test, the normalized read counts for the fragments overlapping the window were compared between DEX and EtOH treatment conditions, and a p value was reported.

\subsection*{Conservation analysis}



\section*{Acknowledgments}

High throughput sequencing for the C2C12 ChIP-Seq samples was performed by Igor Antoshechkin at the Millard and Muriel Jacobs Genetics and Genomics Laboratory.

This material is based upon work supported by the National Science Foundation under Grant No. CNS-0521433.

\begin{thebibliography}{100}

\input{references}

\end{thebibliography}

\end{multicols}

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\clearpage

\section*{Supplementary Methods}

\section*{Supplementary Figures}

\begin{FPfigure}
\begin{center}
\includegraphics[width=15cm]{FigS1-region-CNE-length.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The length of cREs tested in this study and of conserved noncoding elements in the genome}. (A) The length of conserved noncoding regions in the human genome. The \texttt{phastCons100way} conservation track for the \texttt{hg20} version of the human genome was downloaded from the UCSC Genome Browser. Blocks of conservation, in which all nucleotides have \texttt{phastCons} scores higher than the indicated minimum (\texttt{phCons}), were identified, and then merged into larger regions if the length of the gaps between them was smaller than the indicated \texttt{maxGap} parameter. The distribution of the lengths of the resulting sets of regions was plotted. This approach captures the properties of enhancer elements observed in the genome, which often consist of multiple blocks of highly conserved sequences separated by gaps of less conserved sequences, resulting in an enhancer element of up to a few hundred base pairs in length or more. (B) Such an example is shown for the \textit{Acta1} gene in mouse. (C) Distribution of functional assay construct lengths tested in this study in C2C12 cells. (D) Distribution of functional assay construct lengths tested in this study in G1E cells.} 
\label{FigS1}
\end{FPfigure}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{FigS2-G1E-activity-vs-RPM-conservation.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{FigS2}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{FigS3-G1E-activity-vs-RPM-TF-binding.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{FigS3}
\end{figure*}

\end{document}