\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage{pagecolor}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionBefore]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{3pt}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}
% \usepackage{fancyhdr}
% \pagestyle{fancy}
% \renewcommand{\sectionmark}[1]{
% \markright{\thesection\ #1}}
% \fancyhf{} % delete current header and footer
% \fancyhead[CO]{Third-generation sequencing and functional genomics}
% \fancyhead[CE]{Georgi K. Marinov}
% \fancyhead[RO,LE]{\thepage}
% \fancyhead[LO]{\bfseries\rightmark}
% \fancyhead[RE]{\bfseries\leftmark}
% \renewcommand{\headrulewidth}{0pt}
% \renewcommand{\footrulewidth}{0pt}
% \addtolength{\headheight}{0.5pt} % space for the rule
% \fancypagestyle{plain}{%
% }

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf Enhancer activity and biochemical signatures in ENCODE Encyclopedia Candidate Regulatory Elements}
\renewcommand\Authfont{\scshape\normalsize}
%\author[*]{ALL OTHER AUTHORS I'VE MISSED}
\author[1]{Gilberto DeSalvo}
\author[6]{Georgi K. Marinov}
\author[2]{Christopher Partridge}
\author[4,7]{Christopher M. Vockley}
\author[3]{Nergiz Dogan}
\author[8,9]{Ricardo Ramirez}
%\author[1]{Gordon Kwan}
\author[4,5]{Timothy E. Reddy}
\author[8,9]{Ali Mortazavi}
\author[3]{Ross C. Hardison}
\author[2]{Richard M. Myers}
\author[1]{Barbara J. Wold}
\renewcommand\Affilfont{\itshape\normalsize}
%\affil[*]{other affiliations}
\affil[1]{Division of Biology and Biological Engineering, California Institute of Technology, Pasadena, California, USA.}
\affil[2]{HudsonAlpha Institute for Biotechnology, 601 Genome Way, Huntsville, AL 35806, USA}
\affil[3]{Dept. of Biochemistry and Molecular Biology, Penn State University, 304 Wartik Laboratory, University Park, PA 16802, USA}
\affil[4]{Center for Genomic \& Computational Biology, Duke University, Durham, NC 27708, USA}
\affil[5]{Department of Biostatistics \& Bioinformatics, Duke University, Durham, NC 27708, USA.}
\affil[6]{Independent researcher; Zahari Zograf 24 V 8, Pleven, Bulgaria}
\affil[7]{Department of Cell Biology, Duke University, Durham, NC 27708, USA}
\affil[8]{Department of Developmental and Cell Biology, University of California Irvine, Irvine, CA 92697-2300, USA}
\affil[9]{Center for Complex Biological Systems, University of California Irvine, Irvine, CA 92697-2280, USA}
\date{}

\begin{document}
\maketitle

% \let\thefootnote\relax\footnotetext{Corresponding author:
% Georgi K. Marinov. Division of Biology and Biological Engineering, California Institute of Technology. Pasadena, CA 91125.}
% \centerline{}
% \centerline{}
\begin{abstract}
\noindent {\normalsize \textbf{An aspiration for functional genomics is to identify all cis-acting regulatory elements in the genome and define their activities.  Genome-wide transcription factor occupancy and chromatin state profiles are widely used to identify candidate regulatory elements (cREs), although accurately predicting their varied regulatory activities and cell type specificities has proved surprisingly difficult. Multiple large-scale efforts, including ENCODE and Epigenomic Roadmap projects, have generated rich standardized biochemical data from large numbers of diverse cell lines, tissues and developmental stages for human and mouse. These data collections are driving attempts to catalog the global cRE repertoire, including a new ENCODE Encyclopedia of Elements V1.0. In light of these expanded resources, we aimed to uniformly analyze a collection of pertinent enhancer tests for hundreds of individual constructs in five cell types, including both progenitor cell states and mature differentiated cell systems.  The elements were drawn from both TSS-proximal and far-distal locations; were designed to include the full-length cREs; and were selected using criteria that ranged from simple single-TF occupancy to integrative machine learning models. A few shared qualitative and quantitative themes emerged: roughly 50\% of elements occupied by at least one TF and possessing the basic ENCODE-style cRE signature (DNAse$^+$/H3K27ac$^+$) showed enhancer activity. Having sampled across the full range of TF/DHS/H3K27Ac signal, we could project that the vast majority of enhancers genome-wide have very modest activity levels by this assay. We compared these results with a massively parallel STARR-seq design for glucocorticoid sensitive cREs and found them largely similar.  We also uncovered a set of developmentally distinct distal elements marked in progenitor cells to predict future, rather than contemporaneous, enhancer activity upon differentiation. Finally, developmentally privileged loci possessing multiple, highly regulated genes were enriched for enhancer cREs with exceptionally strong individual activity despite having only modest biochemical signatures. }}
% \centerline{}
% \centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

The complete and accurate understanding of the relationship between the human genome and its corresponding phenotypes requires the comprehensive characterization of its compendium of functional elements. The results of the many genome-wide epigenomic and transcriptomic studies carried out over the last decade reveal a remarkable picture, in which non-coding regulatory elements constitute the bulk of such functional regions in the genome\cite{mouseENCODE2014,ENCODE2012}, with the expression of each gene (protein coding or non-coding) being controlled by the integrated output of multiple proximal and distal enhancer, insulator and silencing elements.

The genome-wide mapping and characterization of non-coding regulatory elements is thus a major goal of the field, and features prominently among the objectives of the \textbf{ENC}yclopedia \textbf{O}f \textbf{D}NA \textbf{E}lements (ENCODE) consortium\cite{ENCODE101}. However, achieving it, although greatly aided by the advent of high-throughput sequencing and epigenomic tools, is still not a simple task.

The biochemical activity associated with the function of regulatory elements results in certain biochemical signatures that can be captured by epigenomic assays. For example, active promoters in eukaryotes are classically associated with the trimethylation of lysine 4 on histone 3 (H3K4me3)\cite{Vermeulen2010}, as well as other biochemical signatures, such as DNAse hypersensitivity\cite{Thurman2012}. Active enhancer elements have been proposed to exhibit their own biochemical signature, featuring DNAse hypersensitivity, H3K27ac, H3K4me1 and occupancy by the p300 acetyltransferase\cite{Creyghton2010,RadaIglesias2011,Thurman2012} as well as by sequence-specific transcription factors.

These biochemical signatures enable the compilation of lists of candidate functional elements (cREs), but they do not on their own allow the conclusive identification of any given element as functional. While functional regulatory elements exhibit characteristic biochemical signatures, the reverse (that the presence of a biochemical signature necessarily means function) cannot be inferred straightforwardly\cite{Kellis2014}. Such inferences are further complicated by the observation that biochemical signatures are not binary but instead exist on a continuum between strong outstanding features, about which little doubt can exist, on one hand, and what is probably biochemical noise on the other. For example, it is far from clear that all transcription factor binding sites that can be reproducibly identified using ChIP-seq and related techniques are in fact functional REs\cite{Fisher2012}. Therefore, individual cREs in the lists compiled by efforts such as the ENCODE and mouseENCODE consortia\cite{ENCODE2012,mouseENCODE2014} have to be directly tested for function and characterized in detail.

The ultimate functional characterization of cREs will involve a combination of loss-of-function assays and direct assays for activity. The former have been until recently technically challenging, but are becoming more commonplace with the advent of large-scale CRISPR/Cas9-mediated mutagenesis techniques\cite{Korkmaz2016,Fulco201}. Nevertheless, most work in the field has been based on testing cREs for regulatory activity using an exogenous plasmid construct combining a cRE, a promoter and a reporter % we need an old-school reference for transfectons. 

Classically, such testing has been done by cloning individual cREs into plasmids (or other vectors) and then assaying the expression of the reporter gene (luciferase activity in cell lines or staining for LacZ activity in embryos\cite{Visel2007,Fisher2012}). Numerous developmental enhancers have been characterized following that approach\cite{Visel2007,Visel2009,May2011}, usually starting from lists of cREs derived from p300 ChIP-seq datasets. However, \hl{XXX explanation of issues with system XXX}. 

High-throughput sequencing has enabled the development of assays that go beyond the testing of individual cREs, one by one; instead very large numbers of sequences are analyzed in parallel, with the readout being based on sequencing DNA tags associated with the cRE or of the cRE itself. These are usually referred to as Massively Parallel Reporter Assays (MPRA)\cite{Inoue2015}, and a number of variations of the principle have been successfully applied to a multitude of biological problems and systems in the last few years\cite{Patwardhan2009,Kinney2010,Kwasnieski2012,Melnikov2012,Patwardhan2012,FIREWACh2014}, including the question of testing cREs for activity within the context of the ENCODE Consortium\cite{Kheradpour2013,Kwasnieski2014,Ernst2016}. However, several issues complicate the interpretations of MPRA experiments.

First, the nature of MPRA designs is such that the elements tested are very short, in the 80-250bp neighborhood. This is significantly shorter than tens of thousands of blocks of conserved noncoding sequence that can be identified in the human genome by comparative genomic analysis (Figure \ref{Fig1}). Thus to what extent complete REs are assayed and what the corresponding false negative rate is have always been an obvious concern regarding MPRAs. 

Second, given that it is very difficult to control the number of constructs going into each individual cell in transfection experiments, and that an MPRA features large numbers of different cREs being tested in the same time, there is significant potential for crosstalk between active and inactive cREs that end up in the same cell, resulting in numerous false positives. 

While the latter concern can be alleviated to an extent through the use of genome-integrated MPRA constructs \cite{Maricque2016,Inoue2016}, the short length of constructs tested remains a significant issue, and one that might be behind the low positive rates reported by MPRAs in the past\cite{Vockley2016}. 

There is therefore a major gap in the field that needs to be filled by testing a large number of individual constructs of large size (500-1000 bp) using a traditional luciferase assay, and using the resulting daya to examine the performance of biochemical signature predictions.

To this end, we tested hundreds of constructs of such length in several mammalian cell line systems (Figures \ref{Fig2} and \ref{Fig3}). \hl{XXXX FINISH XXX}

\hl{XXX SUMMARY OF RESULTS AT END OF INTRO, TO BE WRITTEN XX}

% \cite{Vockley2016}

% \cite{STARRseq2013}

\section*{Results}

\subsection*{Large-scale functional assay testing of cREs}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=14.5cm]{eLifeFig1-CNE-length.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The length of thousands of conserved noncoding elements in mammalian genomes greatly exceeds the size range of MPRA constructs}. (A) The length distribution of conserved noncoding regions in the human genome. The \texttt{phastCons100way} conservation track for the \texttt{hg20} version of the human genome was downloaded from the UCSC Genome Browser. Blocks of conservation, in which all nucleotides have \texttt{phastCons} scores higher than the indicated minimum (\texttt{phCons}), were identified, and then merged into larger regions if the length of the gaps between them was smaller than the indicated \texttt{maxGap} parameter. The distribution of the lengths of the resulting sets of regions was plotted. This approach captures the properties of enhancer elements observed in the genome, which often consist of multiple blocks of highly conserved sequences separated by gaps of less conserved sequences, resulting in an enhancer element of up to a few hundred base pairs in length or more. (B) Such an example is shown for the \textit{Acta1} gene in mouse.} 
\label{Fig1}
\end{figure*}

\end{multicols}

\begin{FPfigure}
\begin{center}
\includegraphics[width=15.5cm]{eLifeFig2-cartoon.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Outline of experimental design, cRE selection and testing}. \hl{XXXX details XXXX} } 
\label{Fig2}
\end{FPfigure}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=14cm]{eLifeFig3-contruct-length.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Length distribution of functional assays constructs used to test cREs in this study}. (A) Distribution of functional assay construct lengths tested in this study in C2C12 cells. (B) Distribution of functional assay construct lengths tested in this study in G1E cells. (C) Distribution of functional assay construct lengths tested in this study in K562 and HepG2 cells } 
\label{Fig3}
\end{figure*}

\subsection*{The regulatory landscape of muscle differentiation}

\end{multicols}

\clearpage

\includegraphics[width=17.5cm]{eLifeFig4a-C2C12-heatmaps-MyoD.png}

\clearpage

\includegraphics[width=17.5cm]{eLifeFig4b-C2C12-heatmaps-myogenin-only.png}

\clearpage

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{eLifeFig4c-C2C12-heatmaps-MEF2-only.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{CAPTION}}. \hl{CAPTION GOES HERE} } 
\label{Fig4}
\end{figure*}

\begin{multicols}{2}

\hl{XXXXXXXXXXXXXXXXXXXX}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{eLifeFig5-C2C12-DNAse-H3K27ac.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{CAPTION} }. \hl{caption.} }
\label{Fig5}
\end{figure*}

\subsection*{Regulatory activity of cREs in muscle cells}

\end{multicols}

\clearpage

\includegraphics[width=18.5cm]{eLifeFig6a-C2C12-funcassay-locus.png}

\clearpage

\includegraphics[width=18.5cm]{eLifeFig6b-C2C12-funcassay-random.png}

\clearpage

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=16.5cm]{eLifeFig6c-C2C12-funcassay-negative-controls.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig6}
\end{figure*}

\begin{multicols}{2}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{eLifeFig7-C2C12-predictivity.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig7}
\end{figure*}

\subsection*{The regulatory landscape of erythroid differentiation}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{eLifeFig8-G1E-TAL1-GATA-heatmaps.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig8}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{eLifeFig9-G1E-DNAse-H3K27ac.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig8}
\end{figure*}

\subsection*{Regulatory activity of cREs in erythroid cells}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{eLifeFig10-G1E-funcassay.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig10}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=16.5cm]{eLifeFig11-G1E-predictivity.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTIO GOES HERE} 
\label{Fig11}
\end{figure*}

\subsection*{The chromatin landscape of human immortalized cell lines}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{eLifeFig12-K562_HepG2-DNAse-H3K27ac.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig12}
\end{figure*}

\subsection*{Regulatory activity of cREs in human immortalized cell lines}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{eLifeFig13-K562_HepG2-funcassay.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig13}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.5cm]{eLifeFig14-K562-HepG2-A549-plots.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf CAPTION}. (A) CAPTION GOES HERE} 
\label{Fig14}
\end{figure*}

\subsection*{The regulatory landscape of glucocorticoid response in A549 cells}

\end{multicols}
\clearpage

\begin{sidewaysfigure}
\begin{center}
\includegraphics[width=24cm]{eLifeFig15-A549-heatmaps.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{A549 heat maps} }. (A) CAPTION GOES HERE} 
\label{Fig.5.3}
\end{sidewaysfigure}

\clearpage

\begin{multicols}{2}

% Fig16: STARR-seq
% Fig17: ROC vs STARR-seq

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=14cm]{eLifeFig19-predictivity.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{NOTE: PARTIAL FIGURE} }. (A) CAPTION GOES HERE} 
\label{Fig19}
\end{figure*}

\section*{Discussion}

Promoter-enhancer specificity \cite{Zabidi2015}

integration in genome \cite{Maricque2016,Inoue2016,FIREWACh2014}

\section*{Methods}

Except where otherwise stated, all analyses were performed using custom-written python scripts. The GENCODE

\subsection*{Cell culture}

\subsubsection*{C2C12 cells}

C2C12 myoblasts %and 10T1/2 fibroblasts 
were maintained and seeded for transfection in 20$\%$ FBS supplemented DMEM medium. Upon reaching $>$80$\%$ confluency, the cells were were differentiated %, or mock differentiated for 10T1/2s 
using 2$\%$ horse serum and 1 $\mu$M insulin in DMEM medium.

\subsubsection*{G1E cells}

\hl{XXXX DETAILS XXXX}

\subsubsection*{K562 cells}

\hl{XXXX DETAILS XXXX}

\subsubsection*{HepG2 cells}

\hl{XXXX DETAILS XXXX}

\subsubsection*{A549 cells}

\hl{XXXX DETAILS XXXX}

\subsection*{Cloning and DNA purification}

\subsection*{Functional assays}

\subsubsection*{C2C12 cells}

Candidate REs as well as a set of negative control regions were either PCR-amplified from female BALB/C purified mouse genomic DNA (Switchgear Genomics) or synthesized de novo (Genscript). The resulting DNA was cloned into a reporter vector 5$^\prime$ of a custom TK promoter (SwitchGear Genomics) driving a high-turnover sequence-optimized luciferase reporter gene. Plasmids were purified using Miniprep kits (Quiagen) and standardized to 30 ng/$\mu$L using fluorometry concentration measurements (Qubit\textsuperscript{\textregistered} dsDNA HS (High Sensitivity) Assay Kit).

For the purpose of testing elements in the myoblast state, undifferentiated C2C12 cells were seeded in 96-well delta surface plates (NUNC) quadruplicates 12 hours before transfection at a concentration of 2500 cells/well. For the purpose of testing elements in the myocyte state, undifferentiated C2C12 cells were seeded at a density of 3500 cells/well. %10T1/2 cells are seeded at 5000 cells per well. 
Transfections were carried out with 50ng of DNA per construct in each replicate using Lipofectamine LTX, after a 5 minute incubation with a 1:16 dilution with the PLUS reagent(Thermo Fisher). Myoblast plates were lysed using a Steady-Glo\textsuperscript{\textregistered} kit, and luminescence was measured on a plate luminometer 24 hour post-transfection. Myocyte plates %and mock differentiated 10T1/2 mesodermal precursor 
had their media exchanged with differentiation 12-16 hours post transfection and measured following the same procedure 24 hours later.

Aside from the plate reading step, the entirety of the trasnfection process was automated and carried out on a Tecan Freedom EVO 200 robot.

\subsubsection*{G1E/K562 cells}

\hl{XXX DETAILS XXX}

\subsubsection*{K562, HepG2 and A549 cells}

\hl{XXX DETAILS XXX}

\subsection*{Functional Assay Data processing}

Luminometer data was ratioed relative to the basal promoter vector (relative assay activity). Active cREs were discriminated from inactive using a Z-score analysis that compared the population of test element technical replicate values to the set of negative control vectors. \hl{XXX this could be stated more explicitly with the actual formulas XX}. 

\subsection*{ChIP-seq experiments}

Chromatin immunoprecipitation in A549 cells was performed as previously described (Reddy et al. 2009) using 2 $\times$ 10$^7$ A549 cells per replicate. Cells were sonicated using a Bioruptor XL (Diagenode) on the high setting until the resulting chromatin was fragmented to a median fragment size of 250 nt as assayed by agarose gel electrophoresis. GR ChIP was performed using 5 $\mu$g of a rabbit polyclonal $\alpha$-GR antibody (Santa Cruz Biotechnology sc-1003), and 200 $\mu$l of magnetic sheep anti-rabbit beads (Life Technologies M-280). H3K27ac ChIP was performed using \hl{XXX Ab source XXX}. After reversal of formaldehyde crosslinks at $65\,^{\circ}\mathrm{C}$ overnight, DNA was purified using MinElute DNA purification columns (QIAGEN). Illumina sequencing libraries were then generated using the Apollo 324 liquid handling platform according to manufacturer's specifications (Wafergen). 

ChIP-seq in C2C12 cells was performed using chromatin from 2 $\times$ 10$^7$ nuclei, which was fragmented using a Misonix probe tip sonicator and subjected to immunoprecipitation using a robotic ChIP pipeline described before. The resulting purified DNA was then converted into sequencing libraries and sequenced on an HiSeq 2500 (Illumina) as described previously\cite{Gasper2016}. The following antibodies were used: $\alpha$-myogenin (\hl{xxx}), $\alpha$-MyoD (\hl{xxx}), $\alpha$-MEF2 (\hl{xxx}), $\alpha$-p300 (\hl{xxx}), $\alpha$-E2A (\hl{xxx}), $\alpha$-H2B (\hl{xxx}), and $\alpha$-H3K27ac (\hl{xxx}). 

In addition, publicly available\cite{} Pbx1 ChIP-seq and Control datasets were downloaded from GEO accession GSE76010. 

For G1E, K562 and HepG2 cells, previously publicly available\cite{ENCODE101,ENCODE2012,mouseENCODE2014} ChIP-seq datasets were downloaded from the ENCODE portal \burl{https://www.encodeproject.org/}. 

\subsection*{DNAse-seq experiments}

In C2C12 cells, DNAse-seq was carried out as follows: \hl{XXXXX DETAILS XXX}

In A549 cells, DNAse-seq was carried out as follows: \hl{XXXXX DETAILS XXX}. 

For G1E, K562 and HepG2 cells, previously publicly available\cite{ENCODE101,ENCODE2012,mouseENCODE2014} DNAse-seq datasets were downloaded from the ENCODE portal \burl{https://www.encodeproject.org/}. 

\subsection*{RNA-seq experiments}

\hl{XXX C2C12 RiboMinus XXX}

\subsection*{STARR-seq experiments}

The STARR-seq experiments previously published by Vockley et al. \cite{Vockley2016} were used in this study. 

\subsection*{Genomic coordinate conversion}

The regions to be tested using functional assays were designed based on the \verb|mm8| and \verb|mm9| verions of the mouse genome and the \verb|hg19| version of the human genomes. Conversion of the original coordinates to \verb|mm10| and \verb|hg20| coordinates was performed using the \verb|liftOver| tool from the UCSC Genome Browser Utilities\cite{UCSCGB}.

\subsection*{Conservation analysis}

Sequence conservation analysis were carried out using the \verb|phastCons60way| and \verb|phastCons100way| conservation tracks, which were downloaded from the UCSC Genome Browser\cite{UCSCGB}. 

\subsection*{ChIP-seq data processing and analysis}

ChIP-seq reads were trimmed down to 36 bp in length and mapped against the \verb|hg20| (for human samples; the male or female version depending on the sex of the cell line the sample originated from) and \verb|mm10| (for mouse samples) using Bowtie \cite{Bowtie} (version 1.0.1) with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. DNAse-seq reads were processed similarly except that they were trimmed down to 20bp for A549 samples and 36bp for C2C12 cells (due to differences in the experimental protocol used to generate the data).

Peak calling was carried out as follows. For DNAse and H3K27ac datasets, MACS2 \cite{Feng2012} (version 2.1.0) was run on individual replicates and on pseudoreplicates (generated by randomly splitting the pooled set of reads for both replicates into two) with relaxed settings (\verb|--to-large| \verb|-p 1e-1|). For H3K27ac control datasets were subjected to the same treatment (no background/control is available for DNAse data) The top 100,000 peaks from each replicate or pseudoreplicate (ranked by $q$-value) were then used as input into IDR \cite{Li2011}. The number of peaks above a given IDR threshold called as reproducible between true replicates ($N_t$) and between pseudoreplicates  ($N_p$) were recorded. Peak calling was then carried out on the pooled set of reads and the top $max(N_t,N_p)$ peaks were chosen as the final set of reproducible peaks. For point-source \cite{Pepke2009} datasets (transcription factors), peak calling was carried out following the same procedure but using SPP \cite{Kharchenko2008} (version 1.10.1), using the top 300K peaks as input to IDR.

The pooled sets of reads were also used to calculate RPM (reads per million) enrichment values over elements tested in functional assays.

\subsection*{RNA-seq data processing}

ENCODE paired-end human RNA-seq reads were trimmed down to 2 $\times$ 75mers. C2C12 RiboMinus reads were aligned as 1 $\times$ 50mers. Reads were initially aligned against human or mouse ribosomal RNA sequence, and the unmapped reads were then aligned against the \verb|hg20| and \verb|mm10| versions of the human genome using TopHat\cite{TopHat2} (version 2.0.8). 

\subsection*{STARR-seq data processing and analysis}

% Estimation of Regulatory Activity in STARR-Seq Assayed Sites
% BAC STARR-seq output libraries were sequenced on an Illumina MiSeq Instrument using paired-end 25 bp reads. Reads were aligned to the hg19 reference genome sequence for the target BACs using Bowtie (Langmead and Salzberg 2012), paired end reads were converted to corresponding fragments, and the number of reads per fragment was counted for each replicate. The number of reads in each experiment was then normalized to the median of the number of reads per fragment divided by the geometric mean read count of that fragment. To estimate the significance of differences between EtOH and DEX treatment, a Wilcoxon signed-rank test was performed using a sliding 1 bp window across the target BACs. For each test, the normalized read counts for the fragments overlapping the window were compared between DEX and EtOH treatment conditions, and a p value was reported.

% GLM, mean read counts in control and in STARR; stats on differences

\section*{Acknowledgments}

Library generation and high-throughput sequencing for C2C12 ChIP-Seq samples was performed by Igor Antoshechkin at the Millard and Muriel Jacobs Genetics and Genomics Laboratory. The authors would also like to thank Diane Trout and Henry Amrhein for technical assistance with maintaining the computational infrastructure used to carry out this study.

This material is based upon work supported by the National Science Foundation under Grant No. CNS-0521433.

\begin{thebibliography}{100}

\input{references}

\end{thebibliography}

\end{multicols}

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\clearpage

% \section*{Supplementary Methods}

% \section*{Supplementary Figures}

% \begin{figure*}[!ht]
% \begin{center}
% \includegraphics[width=17.5cm]{FigS2-G1E-activity-vs-RPM-conservation.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
% \end{center}
% \captionsetup{singlelinecheck=off,justification=justified}
% \caption{
% {\bf CAPTION}. (A) CAPTIO GOES HERE} 
% \label{FigS2}
% \end{figure*}

% \begin{figure*}[!ht]
% \begin{center}
% \includegraphics[width=17.5cm]{FigS3-G1E-activity-vs-RPM-TF-binding.png}
% \includegraphics[angle=90,origin=c,width=17.5cm]{Fig1.png}
% \includegraphics[width=23.5cm]{Fig1.png}
% \end{center}
% \captionsetup{singlelinecheck=off,justification=justified}
% \caption{
% {\bf CAPTION}. (A) CAPTIO GOES HERE} 
% \label{FigS3}
% \end{figure*}

\end{document}