\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{capt-of}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage{pagecolor}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionAfterwards]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{0pt plus 0.3ex}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\titleformat{\subsubsection}[block]{\normalsize\sc\itshape\filright}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}

\usepackage[hang]{footmisc}
\setlength\footnotemargin{0em}

\setlength{\skip\footins}{0.75cm}

\makeatletter
\renewcommand\footnoterule{%
  \kern-3\p@
  \hrule\@width \textwidth height 1.5pt
  \kern2.6\p@}
\makeatother

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf Long-range single-molecule mapping of chromatin accessibility in eukaryotes}
\renewcommand\Authfont{\scshape\normalsize}
\author[1,*]{Zohar Shipony}
\author[1,*]{Georgi K. Marinov}
\author[5]{Matthew P. Swaffer}
\author[1]{Nicholas A. Sinnott-Armstrong}
\author[5]{Jan M. Skotheim}
\author[1,2]{Anshul Kundaje}
\author[1,3,4,$\#$]{William J. Greenleaf}
\renewcommand\Affilfont{\itshape\small}
\affil[1]{Department of Genetics, Stanford University, Stanford, CA 94305, USA}
\affil[2]{Department of Computer Science, Stanford University, Stanford, CA 94305, USA}
\affil[3]{Department of Applied Physics, Stanford University, Stanford, CA 94305, USA}
\affil[4]{Chan Zuckerberg Biohub, San Francisco, California, USA}
\affil[5]{Department of Biology, Stanford University, Stanford, CA 94305, USA}
\affil[*]{These authors contributed equally to this work}
\affil[$\#$]{Corresponding author}
\date{}

\begin{document}
\maketitle

\begin{abstract}

\noindent {\normalsize \textbf{Mapping open chromatin regions has emerged as a widely used tool for identifying active regulatory elements in eukaryotes. However, existing approaches, limited by reliance on DNA fragmentation and short-read sequencing, cannot provide information about large-scale chromatin states or reveal coordination between the states of distal regulatory elements. We have developed a method for profiling the accessibility of individual chromatin fibers (SMAC-seq, or \underline{\textbf{S}}ingle-\underline{\textbf{M}}olecule long-read \underline{\textbf{A}}ccessible \underline{\textbf{C}}hromatin mapping \underline{\textbf{seq}}uencing assay), enabling the simultaneous, high-resolution, single-molecule assessment of chromatin states at multi-kilobase length scales. Our strategy is based on combining the preferential methylation of open chromatin regions by DNA methyltransferases with low sequence specificity, in particular using EcoGII, an N$^6$-methyladenosine (m$^6$A) methyltransferase, and the ability of nanopore sequencing to directly read DNA modifications. We demonstrate that aggregate SMAC-seq signals match bulk-level accessibility measurements, observe single-molecule nucleosome and transcription factor protection footprints, and quantify the correlation between chromatin states of distal genomic elements.}
}
\centerline{}
% \centerline{}
\end{abstract}

\renewcommand{\footnotesize}{\fontsize{10pt}{12pt}\selectfont}

\begin{multicols}{2}

% \filllastline{

In eukaryotes, open chromatin regions are associated with regulatory elements (REs), such as enhancers, promoters, and insulators. This property is highly useful for identifying candidate REs (cREs) and for understanding the functional organization of genomes \cite{Wu1980,Keen1981,McGhee1981}, \cite{Dorschner2004,Sabo2006} \cite{Boyle2008,Hesselberth2009}, and methods such as DNAse hypersensitivity-based mapping and MNase-seq are widely used to map putative regulatory elements or nucleosome positioning \cite{Schones2008}. More recently, the Tn5 transposase was adapted as a probe for chromatin accessibility\cite{Buenrostro2013}. However, these short read methods provide little insight into the long-range physical organization of individual chromatin fibers, as they do not preserve linkage between the state of distal segments along a single molecule.

We developed SMAC-seq, a single-molecule method that directly assays both open chromatin regions and nucleosome positioning within a single chromatin fiber at multikilobase scales. We use SMAC-seq to study chromatin architecture and co-accessibility states in the yeast \textit{Saccharomyces cerevisiae}. We assess the degree of coordination between positions of nearby nucleosome particles, enumerate mutually exclusive regulatory states along individual loci, and observe coordinated changes in nucleosome positioning and chromatin accessibility upon transcriptional activation. SMAC-seq allows for footprinting of transcription factor (TF) occupancy, and provides strand-specific information about the exposure of DNA occupied by nucleosomes. We expect future applications of, improvements on, and extensions of the SMAC-seq approach to enable novel insights into the dynamics of chromatin states in a wide variety of experimental systems.

\section*{Results}

\subsection*{SMAC-seq maps chromatin accessibility and nucleosome positioning at the multikilobase scale}

\end{multicols}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig1V6.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The SMAC-seq assay for profiling chromatin accessibility and nucleosome positioning at the multikilobase scale}. 
(a) Outline of the SMAC-seq assay. Intact chromatin is treated with m$^6$A and CpG and GpC 5mC methyltransferases, which preferentially methylate DNA bases in open chromatin regions. HMW DNA is then isolated, subjected to nanopore sequencing and methylated bases are used to reconstruct the open chromatin state within individual molecules. 
(b-h) SMAC-seq faithfully captures chromatin accessibility around promoters and positioned nucleosomes in \textit{S. cerevisiae}; 
(b) MNAse-seq and dSMF profiles around chemically mapped positioned nucleosome dyads; 
(c) DNAse-seq and dSMF profiles around the top 20\% highly expressed genes in \textit{S. cerevisiae};
(d) DNAse-seq and dSMF profiles around the bottom 20\% expressed genes in \textit{S. cerevisiae};
(e) Average SMAC-seq profile around chemically mapped positioned nucleosomes dyads (shown is the  ``diamide 0 min rep2'' sample); 
(f) Average SMAC-seq profile around the top 20\% highly expressed genes in \textit{S. cerevisiae};
(g) Average SMAC-seq profile around the bottom 20\% expressed genes in \textit{S. cerevisiae};
(h) SMAC-seq correlates closely with both DNAse-seq and nucleosome occupancy profiling at the level of individual loci, and provides a combined readout of accessibility and nucleosome positioning. Shown is aggregate SMAC-seq signal along the genome (aggregated over 50-bp windows sliding every 5 bp; see Methods for details) together with DNAse-seq, nucleosome chemical mapping data, and transcriptional activity (measured by PRO-seq and PRO-cap). Large aggregate SMAC-seq signal enrichments match closely with DNAse accessibility peaks, while smaller aggregate SMAC-seq peaks are inversely correlated with positioned nucleosomes; 
(i) SMAC-seq profiles chromatin accessibility in repetitive regions of the genomes that are ``invisible'' to short reads. Shown is the telomeric region of chrXVI 
} 
\label{Fig1}
\end{figure*}

\begin{multicols}{2}

% \noindent 

SMAC-seq is built on the conceptual foundations of NOMe-seq/dSMF\cite{Kelly2012,Nabilsi2014, Krebs2017}. These methods rely on preferential modification of accessible DNA with M.CviPI and/or M.SssI  (GpC/CpG-specific 5mC methyltransferases), followed by bisulfite conversion and Illumina-based sequencing readout (both enzymes can be used in the absence of endogenous 5mC methylation).
We use the m$^6$A methyltransferase EcoGII\cite{Murray2018} as an alternative/addition to CpG/GpC, and use nanopore sequencing to generate single-molecule readouts of accessibility states over many kilobases (Figure 1a). Nanopore sequencing allows direct detection of these modifications, \cite{Simpson2017,Rand2017} enabling the generation of methylation maps for individual DNA molecules, which can then be interpreted in terms of chromatin accessibility.

The addition of m$^6$A signal associated with accessible chromatin substantially improves both the resolution and applicability of SMAC-seq. Many genomes are endogenously methylated at 5mC positions\cite{Feng2010,Zemach2010}, usually in CpG contexts, but not always\cite{Lister2008}, confounding CpG/GpC-based accessibility measurements. More importantly, CpG/GpC dinucleotides are rare. The average resolution achieved by combining the two methyltransferases is $>$10 bp in \textit{Drosophila} and $\sim$15 bp in yeast. It is $\sim$25bp for GpC alone in mammals, and these are averages; in practice many individual regions either completely lack or contain too few informative positions. Using m$^6$A increases SMAC-seq's resolution down to a theoretical limit of $\sim$3 bp in all model organisms, and ensures proper coverage over all individual loci (Supplementary Figures 1--3 and 16--24).

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=17.75cm]{Fig2V8-part1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf SMAC-seq provides a single-molecule linked-read view of the chromatin landscape}. 
(a) Unfiltered nanopore reads fully spanning the 4-kilobase neighborhood of the centromere of \textit{S. cerevisiae} chrIII (``aggregate'' signal ``Sample 1''). 
(b) Unfiltered nanopore reads fully spanning a 6.6-kilobase neighborhood encompassing several genes on chrIV (``aggregate'' signal from ``Sample 1''). 
In both cases, accessibility is shown at 10-bp resolution (see Methods section for details) for the single-molecule display, and aggregated over sliding (every 5 bases) 50-bp windows for the average SMAC-seq track.
% 500 reads sample, BI
}
\label{Fig2part1}
\end{figure*}

We initially developed the method in \textit{S. cerevisiae} as it has no endogenous DNA methylation and has a small genome ($\sim$12 Mbp), enabling very high sequencing coverage. To verify the specificity and efficiency of enzymatic treatments, we carried out both dSMF experiments on chromatin and M.CviPI + M.SssI + EcoGII reactions on naked DNA (gDNA), followed by bisulfite sequencing. We observe $\geq$95\% CpG/GpC methylation for gDNA, $\leq$10\% for chromatin, and $\sim$0\% on untreated gDNA (Supplementary Figure 6). Comparing  dSMF to DNase-seq and MNase-seq profiles around transcription start sites (TSSs) and positioned nucleosomes\cite{Brogaard2012} revealed the expected nucleosome depletion/occupancy patterns (Figure 1b-d).

EcoGII's methylation efficiency is more difficult to estimate as fully methylated templates are known to be difficult to sequence on the Oxford Nanopore platform. Using yeast gDNA or $\lambda$ DNA treated with a high dose of EcoGII (Supplementary Table 1), the limited number of observed reads exhibited $\sim$50\% methylation levels (Supplementary Figure 7 and 8). We hypothesize these rates are underestimates, as biochemical reports suggest >50\% methylation of gDNA after 5 minutes, increasing to $\geq$85\% after an hour \cite{Murray2018}.

We next applied SMAC-seq to unsynchronized \textit{S. cerevisiae} cells. We obtained reads with a median length $\sim$1.5 kbp from this initial experiment, allowing the capture of multiple promoter regions for much of the yeast genome (Supplementary Figures 4-5). We applied the Tombo\cite{Stoiber2017} algorithm (running on top of the Minimap aligner\cite{Li2016}) for ``resquiggling'' of raw nanopore signal and general methylated base calling. We also analyzed our initial dataset with Nanopolish\cite{Simpson2017}, an alternative algorithm for identifying 5mC events in CpG/GpC context.

Unlike Illumina-based bisulfite sequencing, nanopore-based measurement of DNA methylation provides methylation probabilities. While per-base methylation probabilities are skewed towards 0 or 1, a substantial fraction lie in between those extremes (Supplementary Figure 9). We examined multiple approaches for binarizing methylation calls within single molecules to identify the optimal strategy in terms of signal-to-noise ratio (Supplementary Figures 10--11). We compared average SMAC-seq profiles to dSMF, MNase-seq, and DNase-seq, as well as ChIP-seq for RNA Polymerase (Pol2) and transcription initiation factors around known chromatin features (Figure 1e-g, Supplementary Figures 25 and 28). SMAC-seq faithfully reproduces nucleosomal positioning throughout the genome and nucleosome depletion around promoters. We also observe positive correlation between average SMAC-seq methylation levels and DNase/ATAC-seq coverage over promoter regions (Supplementary Figure 27). Strikingly, SMAC-seq has a larger observed dynamic range than dSMF data (possibly due to higher long-read mapping efficiency). Based on dSMF and untreated gDNA data, we estimate the false positive rate of methylation base calling to be $\sim$20\% for Tombo and 10-15\% for Nanopolish (Supplementary Figures 7 and 15). We also examined potential sequence biases inherent to the combination of methylation enzymes and base calling algorithm. We find only modest differences in methylation levels for different $k$-mers ($\leq$2-fold for $k=6$; Supplementary Figures 13-14). 

In practice, the biologically relevant scale of chromatin accessibility is larger than an individual base. We thus reasoned that sharing methylation information between adjacent bases should improve the reliability of accessibility measurements, and developed a Bayesian procedure to aggregate methylation probabilities and derive single-molecule accessibility calls over windows of arbitrary size (thereafter referred to as ``aggregate'' signal). 

We observed a relatively small subpopulation of reads highly methylated over large segments (Supplementary Figure 12 and Figure 2a), which we interpret as originating from naked DNA molecules likely from dead cells. As such reads can confound many analyses, we filter these out (Supplementary Figure 32). However, at certain loci chromatin is indeed largely nucleosome-free in vivo; for such unique loci, we analyze all reads.

We then compared average SMAC-seq profiles against positioned nucleosomes, DNase-seq, and transcriptional activity maps at the level of individual genomic loci (Figure 1h). Qualitatively, we observe that large SMAC-seq peaks match very closely with DNase-seq peaks, while smaller SMAC-seq ``bumps'' inversely correlate with positioned nucleosomes, consistent with labeling of linker DNA. Thus SMAC-seq simultaneously identifies both open chromatin regions and positioned nucleosomes.

The long nanopore reads allow SMAC-seq to map accessibility for the whole yeast genome (Supplementary Figure 31). For example, SMAC-seq maps chromatin and nucleosomes in the repetitive telomere of chrXVI (Figure 1i), which contains several active promoters and numerous well-positioned nucleosomes. SMAC-seq also revealed open chromatin peaks around the promoters of multiple transposable elements (Supplementary Figure 30).

\subsection*{SMAC-seq provides single-molecule accessibility profiles on individual chromatin fibers}

\begin{figure*}
\begin{center}
\begin{minipage}[c]{0.80\linewidth}
\includegraphics[width=14.5cm]{Fig2V8-part2.png}
\end{minipage}\hfill
\begin{minipage}[c]{0.20\linewidth}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf SMAC-seq's single-molecule readout provides insights into the distribution and relationship between mutually exclusive chromatin yeast rDNA states.} 
(a) SMAC-seq reveals the distribution of alternative chromatin states of rDNA arrays. Shown are all reads covering the \textit{RDN37-1} array in the \textit{RDN1} locus in the ``diamide 30 min rep1'' experiment (unfiltered reads, ``aggregate'' signal). See Supplementary Figures 38--41 for additional details. ChIP-seq and ChIP-exo tracks were generated by including and normalizing all multimappers rather than the usual unique-only policy (See the Methods section for more details). 
(b) Normalized mutual information profiles for the \textit{RDN37-1} array show anti-correlation between the accessibility peaks immediately upstream of the \textit{35S} TSS and the nucleosome-free state over the \textit{35S} transcriptional unit.
(c) High-resolution SMAC-seq profiles reveal regulatory protein footprints in the immediate vicinity of the 35S TSS and the Reb1 binding site in the rDNA NTS region (shown are 3000 randomly sampled reads using 10-bp aggregate SMAC-seq signal at 1-bp resolution).
\label{Fig2part2}}
\end{minipage}
\end{center}
\end{figure*}

To demonstrate SMAC-seq's ability to map open chromatin within individual long molecules we investigated all reads spanning the 4-kb neighborhood around the chrIII centromere (Figure 2a). Yeast centromeres are specified by precisely defined sequence elements and are occupied by a single nucleosome containing the H3 histone variant Cse4, thought to be nearly perfectly positioned\cite{Cole2011,Henikoff2014}. We indeed observe strong nucleosomal positioning using SMAC-seq, with nearly all individual reads exhibiting the expected nucleosomal pattern. We also observe hints of substructure in the form of accessibility inside the protected centromeric region and potential protection footprints in its immediate vicinity. We find similarly strong positioning for many other centromeric nucleosomes (Supplementary Figures 33-36). We also examined a $\sim$6.6-kb span of chrIX containing five genes and three open chromatin regions, one of them fairly large and diffuse. In contrast to the more localized accessibility observed elsewhere, this region exhibits considerable accessibility heterogeneity suggesting a complex protein occupancy landscape. 

We next asked if SMAC-seq could reveal binary chromatin accessibility states by investigating ribosomal DNA (rDNA) loci. In yeast, rDNA is organized into multicopy ($\sim$150) arrays, each $\sim$9.1 kb unit of which contains a copy of the 35S precursor pre-rRNA, transcribed by Pol I, a 5S RNA, transcribed by Pol III, and a replication origin ARS element, located in non-transcribed (NTS) regions of the array. The number of units can vary between cells, and the \verb|sacCer3| genome assembly only contains a single locus with two array copies. The rDNA chromatin structure adopts two distinct conformations\cite{Conconi1989,French2003,Goetze2010} -- an inactive nucleosomal state and a state largely devoid of nucleosomes due to extremely high transcription activity \cite{French2003,Merz2008,Panday2016}. The two states are estimated to exist in roughly equal proportions in normally growing cells\cite{Jones2007}. However, other studies have alternatively suggested that nucleosomes are present over actively transcribed rDNA arrays\cite{Jones2007}. Of note, rDNA indeed appears to be extremely accessible in short-read assays -- around half of reads in a typical yeast ATAC-seq dataset originate from rDNA arrays (Supplementary Figure 37). 
Single-molecule SMAC-seq maps reveals a striking picture of the two alternative, mutually exclusive rDNA states (Figure 3a). About a quarter of full-length molecules exhibit near-full accessibility over the 35S transcript, but not in the NTS; the rest show a typical nucleosomal state. A broadly similar picture is observed in all samples (Supplementary Figures 38--41). While it is possible that the two states are differentially represented due to biases against fully methylated long reads, we observe the fully accessible fraction is present in approximately 50\% of shorter windows around the 35S promoter (Figure 3c). We also observe a region of localized accessibilty just upstream of the 35S transcriptional unit present only in the nucleosomal subpopulation, suggesting the possibility of a regulatory switch at this location. Finally, we also observe at least two previously unreported regions inside 35S exhibiting strong accessibility in the nucleosome-protected fraction (Figure 3a). 

To quantify (anti-)correlation between chromatin states, we developed a modified normalized mutual information (NMI) metric for assessing the degree of accessibility correlation between genomic regions. NMI analysis of rDNA confirmed the inverse correlation between the active 35S state and accessibility of this upstream element (Figure 3b). 

What factors might be driving this observed chromatin state switch? Silencing of yeast rDNA is thought to be mediated by the Sir2-containing RENT complex \cite{Huang2003}, and a NTS1 Reb1 binding site has been suggested to recruit corepressors \cite{Jones2007}. We took a higher-resolution view of NTS1 by integrating SMAC-seq, Reb1 ChIP-exo data and transcription factor motif maps. We find a clear pattern of protection around the Reb1 motif, concordant with ChIP-exo (Figure 3c), and we also observe patterns consistent with footprinting for several other TFs. However, the anti-correlated accessibility profile seems to not be exclusively associated with Reb1 binding but rather with the region closer to the 35S TSS. Thus it appears that other proteins may be responsible for establishing this state.

\subsection*{SMAC-seq provides a high-resolution strand-specific view of protein occupancy on DNA}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig3-V4.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf SMAC-seq provides a high-resolution strand-specific view of genomic occupancy by DNA-binding proteins and complexes}. 
(a-b) SMAC-seq allows for footprinting of transcription factor binding events. Shown is aggregate genome-wide SMAC-seq signal around occupied (as measured by ChIP-exo) Reb1 (a), and Rap1 (b) sequence recognition motifs. 
(c) SMAC-seq profiles around positioned nucleosome dyads reveal increased accessibility in the dyad and increased protection at the points of contact with the nucleosome (see Supplementary Figure 28 for additional details.)
(d) SMAC-seq provides a strand-specific view of nucleosome occupancy and reveals differential accessibility between the two DNA strands depending on their position on the nucleosomal particle.
(e-f) Coordination between the positions of individual nucleosomes at the level of single chromatin fibers. 
(e) Shown is the average normalized mutual information between each strongly or poorly positioned nucleosome in the yeast genome and its immediate genomic neighborhood (measured for windows of 10 bp length tiling at every genomic position centered on the nucleosome dyad).
(f) Shown is the average normalized mutual information between each $+1$ nucleosome and its immediate genomic neighborhood in highly expressed and in mostly silent genes (measured for windows of 10-bp length tiling at every genomic position centered on the $+1$ nucleosome dyad).
}
\label{Fig3}
\end{figure*}

We next asked if SMAC-seq can generally identify TF footprints (Supplementary Figure 42). Averaging genome-wide SMAC-seq profiles over occupied motifs revealed strong protection footprints for several factors, such as Reb1, Rap1 and ORC1 (Figure 4b-c, Supplementary Figure 43). We observed high concordance between footprint profiles observed using either DNase-seq, ATAC-seq, and SMAC-seq with footprints identified previously by high-resolution DNAse-seq \cite{Hesselberth2009} in aggregate and at some individual sites (Supplementary Figure 44--51). However, we did not observe strong footprinting for all TFs (e.g. Abf1 and Cbf1; Supplementary Figure 43) even though some do exhibit DNase-seq footprints \cite{Hesselberth2009}, perhaps because different enzymes differ in their ability to access DNA in the context of protein occupancy.

To further explore SMAC-seq's resolution limits, we studied methylation patterns around positioned nucleosomes in more detail (Figure 4e-f, Supplementary Figure 28). We observe marked increase in accessibility signal at the dyad point, in contrast to the points of contact with DNA two helical turns away. The same pattern was observed for all nucleosomes irrespective of positioning strength. We did not observe similar patterns in deep DNAse-seq data (Supplementary Figure 28e). We next quantified strand-specific DNA accessibility within nucleosomes, and observe a strand-asymmetric DNA accessibility patterns around the nucleosome particle (Figure 4f), especially within the dyad and at the points two helical turns away. The magnitude of these differences is similar to that observed between nucleosomes and flanking linker regions. This heterogeneity in methylation potential within the nucleosome may inform the manner by which TFs might interact with nucleosome-associated DNA in vivo\cite{Zhu2018}. We also note that these patterns are most clearly observed using m$^6$A (Supplementary Figure 29).

\subsection*{SMAC-seq reveals chromatin co-accessibility patterns}

We next examined co-accessibility patterns in the yeast genome by assessing nucleosome positioning correlations. Average NMI profiles centered on positioned nucleosomes reveal detectable correlation between nucleosome positions up to three to four nucleosomes away (Figure 4g; Supplementary Figure 52), with strongly positioned nucleosomes exhibiting stronger overall correlation. These observations are consistent with nucleosomes imposing restrictions on one another, resulting in short-range correlation between protection footprints that dephases over longer ranges. We next measured co-accessibility in the vicinity of promoters (Figure 4h). Active yeast TSSs are characterized by an upstream nucleosome-depleted/free region (NFR) and a well-positioned $+1$ nucleosome. NMI profiles centered on the latter show significant differences between expressed and inactive genes. While correlation decays downstream of the TSS similarly for both groups, active genes exhibit an inverse correlation pattern upstream of the TSS.

Active yeast genes often exist in a looped conformation, with promoter and termination regions in physical proximity, an arrangement thought to help enforce transcriptional directionality\cite{OSullivan2004,Tan2012}. We wondered if accessibility would be correlated between the two 	gene ends. SMAC-seq reveals low levels of correlation between the NFR and 3$^\prime$ gene ends, and stronger correlation between positioned nucleosomes in these locations (Supplementary Figure 53). The correlation between the NFR and 3$^\prime$ ends increases for active genes and decreases for silent genes, suggesting that transcriptional activity and/or looping may help more strongly position nucleosomes at the two gene ends. 

We next assessed coordinated accessibility between yeast TSSs. To this end, we devised an explicit test of coordinated coaccessibility based on splitting reads into separate pieces, randomly reassembling them, then deriving an empirical coaccessibility distribution. We identified 1,115 TSS pairs as significantly correlated (out of 19,578 pairs covered with $\geq$100 reads; Supplementary Figure 54). Of these, 560 were located $\geq$1kb from each other (for example see Supplementary Figure 55). One possible mechanism for this correlated accessibility signal is increased frequency of physical association in 3D space. Analysis of Micro-C data\cite{Hsieh2015} shows that significantly coaccessible promoters interact more frequently than non-coaccessible ones at a similar distance (Supplementary Figure 56).

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig6V4.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Coordinated changes in chromatin accessibility and nucleosomal occupancy during the yeast stress response}. 
(a) Experimental outline. Yeast cells were treated with diamide, then SMAC-seq and other functional genomic assays where carried out at 15- or 30-minute intervals. 
(b) Sites occupied by the HSF1 transcription factor upon its activation by the stress response pathway exhibit strong footprints in SMAC-seq data.
(c) Changes in the expression of the \textit{TMA10} gene upon diamide treatment
(d) Changes in RNA Polymerase and HSF1 occupancy (measured by ChIP-seq), and of chromatin accessibility at the single molecule level in the vicinity of the \textit{TMA10} gene during the diamide time course.
(e) Changes in RNA Polymerase and HSF1 occupancy (measured by ChIP-seq), and of chromatin accessibility at the single molecule level in the vicinity of the \textit{HSP26} gene during the diamide time course.
(f) Decrease in the fraction of transcribed rDNA arrays as a result of cellular response to diamide treatment.
}
\label{Fig6}
\end{figure*}

\subsection*{SMAC-seq charts coordinated accessibility changes during the yeast stress response}

Finally, we carried out SMAC-seq during a time course of diamide treatment, to monitor chromatin states during a dynamic response. Diamide oxidizes thiols in proteins, leading to activation of the stress response pathway and changes in the expression of hundreds of genes\cite{Weiner2015}. Yeast stress response is largely mediated by the Hsf1 and Msn2/4 TFs \cite{Morano2012}.

We performed SMAC-seq at 0, 30 and 60 minutes after diamide treatment, as well as RNA-seq, ATAC-seq, and ChIP-seq for Pol2, the elongating Pol2pS2 version, and HSF1 (RNA and ATAC data were also collected at 15 and 45 minutes; Figure 5a). We observe several hundred genes exhibiting strong expression changes (Supplementary Figure 57), and strong Hsf1 occupancy induction at hundreds of sites (Supplementary Figure 58). SMAC-seq at 30 minutes shows strong footprinting over Hsf1 motifs within induced Hsf1 binding sites.

We illustrate the dynamic accessibility patterns we observe upon diamide treatment using the \textit{TMA10} and \textit{HSP26} genes in Figure 5d-e, and multiple others in Supplementary Figures 59--66. \textit{TMA10} and \textit{HSP26} are strongly upregulated at 15 minutes; \textit{TMA10} expression subsequently declines somewhat and stabilizes (Figure 5c) while \textit{HSP26} only declines at 60 minutes. SMAC-seq reveals a relatively modest level of upstream accessibility before diamide treatment. However, at 30 minutes and upon Hsf1 binding, dramatic changes are evident. Nearby nucleosomes are evicted in many cells, and nucleosome depletion increases within gene bodies, where RNA Pol2 ChIP-seq shows highly active transcription. At 60 minutes, this response dampens for \textit{TMA10}, with the fraction of accessible reads decreasing; the effect is less pronounced for \textit{HSP26} whose expression remains relatively higher. NMI co-accessibility maps (Supplementary Figures 59-68) frequently show loss of correlation between positioned nucleosomes within and upstream of activated gene bodies as a result of diamide response, consistent with increased nucleosome movement due to the activity of polymerases and chromatin remodelers. Aggregate SMAC-seq observations were largely corroborated by matched ATAC-seq on the same samples (Supplementary Figures 69--70). We observed a decrease in the transcribed rDNA array fraction (Figure 5f), consistent with decreased rDNA transcription upon activation of the stress response program. 

\section*{Discussion}

SMAC-seq is a novel single-molecule method for profiling chromatin accessibility within individual chromatin fibers at a multikilobase resolution using nanopore sequencing. SMAC-seq generates accessibility signals similar to widely used short-read methods while enabling the simultaneous profiling of nucleosome positioning and accessible chromatin on a truly genome-wide scale, the measurement of the underlying distribution of accessibility states, and the identification of loci exhibiting significant co-accessibility. 

Extending SMAC-seq to larger genomes will require significantly increased sequencing throughput, or selective enrichment of individual loci. Fortunately, nanopore throughput is increasing rapidly, while selective enrichment methods are also becoming available\cite{Gabrieli2018}. Increases in read length will also be useful, especially for assaying co-accessibilty of distal regulatory elements, which can often be tens of kilobases apart in mammalian genomes.

Base calling is another area of future improvement. On hurdle for the creation of more accurate base callers is the lack of ground truth controls for training base calling algorithms (e.g. pools of DNA templates with individual modifications in well-defined yet highly diverse sequence contexts). Alternatively, the use of tags bulkier than a methyl group\cite{TOPSeq} may provide much stronger modulation of the current passing through the nanopore, enabling more reliable signal identification.We also anticipate a diversity of DNA modifying enzymes available to carry out SMAC-seq variations. 

Endogenous methylation in mammalian genomes also represents potentially confounding signal. To evaluate the sale of this concern, we generated low-coverage SMAC-seq data for human GM12878 cells using only EcoGII, and examined aggregate ``m$^6$A-SMAC'' profiles around CTCF sites, open chromatin regions and TSSs. We recovered the expected features of chromatin accessibility (strong nucleosome positioning around CTCF\cite{Fu2008}, accessibility peaks around all three features), and observed no significant difference between SMAC-seq profiles generated by filtering out A positions nearby CpGs, demonstrating that interference from endogenous methylation is likely modest(Supplementary Figures 71--73).

However, there are also species where m$^6$A occurs endogenously and is strongly correlated with chromatin accessibility and nucleosome positioning\cite{Fu2015,Wang2017,Luo2018}. Modifications such as 4mC \cite{Timinskas1995}, cytidine deamination\cite{Salter2016}, or 5-hydroxymethyluracil\cite{Kawasaki2017} are among the potential future alternatives in such cases.
Finally, we believe that the integration of SMAC-seq with other measurements of the physical genome and the epigenome into single-molecule multiomic assays represents a potentially fruitful direction. We envision the possibility of simultaneous single-molecule, multikilobase-scale measurements of accessibility, nucleosomal positioning, endogenous DNA methylation, protein occupancy, chromatin interactions, and/or DNA replication. In principle, similar approaches may also be applicable to individual RNA molecules. We expect long-read single-molecule approaches to provide an important new class of tools for the study of the functional and physical organization of genomes.

\section*{Materials and Methods}

Except for when explicitly stated otherwise, all analyses were carried out using custom-written Python or R scripts. 

\subsection*{Cell lines and cell culture}

The BY4741 \textit{S. cerevisiae} strain (a kind gift from Ji-Ping Wang and Xiaozhong Wang) was used for all experiments except for Hsf1 ChIP-seq experiments where MS143 (H4S47C\_Hsf1-V5::HphMX6, this study) was used. MS143 was generated by PCR-based C-terminal tagging of Hsf1 with the V5 epitope. Hsf1-V5 tagging was confirmed by colony PCR and western blotting. For all experiments, except the initial one (``Sample 1''), cells were grown in YPD media (30$\,^{\circ}\mathrm{C}$) to OD$\sim$0.8 before collection. 

\subsection*{Yeast SMAC-seq experiments}

\subsubsection*{Enzymatic treatment of chromatin}

We developed and optimized SMAC-seq using the equivalent of $1 \times 10^6$ human cells, which in the case of \textit{S. cerevisiae} translates in to $2.5 \times 10^8$ (the size of the haploid human genome is $\sim$$3 \times 10^9$bp while that of \textit{S. cerevisiae} is $1.2 \times 10^6$bp). As yeast cells have a cell wall, we adapted the spheroplasting protocol previously used for carrying out ATAC-seq in yeast cells\cite{Schep2015} for our SMAC-seq experiments.

Yeast cells in log phase (OD$_{660} \leq 1.0$) were first centrifuged at 13,000 rpm for 1 minute, then washed with 100 $\mu$L Sorbitol Buffer(1.4 M Sorbitol, 40 mM HEPES-KOH pH 7.5, 0.5 mM MgCl$_2$), and centrifuged again at 13,000 rpm for 1 minute. Cells were then spheroplasted by resuspending in 200 $\mu$L Sorbitol Buffer with DTT added at a final concentration of 10 mM and 0.5 mg/mL 100T Zymolase, followed by incubating for 5 minutes at $30\,^{\circ}\mathrm{C}$ at 300 rpm in a Thermomixer. The pellet was centrifuged for 2 minutes at 5,000 rpm, washed in 100 $\mu$L Sorbitol Buffer, and centrifuged again at 5,000 rpm for 2 minutes.

Cells were then resuspended in 100 $\mu$L ice-cold Nuclei Lysis Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1 mM EDTA, 0.5\% NP-40) and incubated on ice for 10 minutes. Nuclei were then centrifuged at 5000 rpm for 5 min at $4\,^{\circ}\mathrm{C}$, resuspended in 100 $\mu$L cold Nuclei Wash Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1 mM EDTA), and centrifuged again at 5,000 rpm for 5 min at $4\,^{\circ}\mathrm{C}$. Finally, nuclei were resuspended in 100 $\mu$L M.CviPI Reaction Buffer (50 mM Tris-HCl pH 8.5, 50 mM NaCl, 10 mM DTT). 

Nuclei were then first treated with M.CviPI + EcoGII by adding 200 U of M.CviPI (NEB) and 200 units of EcoGII (NEB), SAM at 0.6 mM and sucrose at 300 mM, and incubating at $30\,^{\circ}\mathrm{C}$ for 7.5 min. After this incubation, 128 pmol SAM and another 100 U of enzymes were added, and a further incubation at $30\,^{\circ}\mathrm{C}$ for 7.5 min was carried out. Immediately after, M.SssI treatment followed, by adding 60 U of M.SssI (NEB), 128 pmol SAM, MgCl$_2$ at 10 mM and incubation at $30\,^{\circ}\mathrm{C}$ for 7.5 min. 

The reaction was stopped by adding an equal volume of Stop Buffer (20 mM Tris-HCl pH 8.5, 600 mM NaCl, 1\% SDS, 10 mM EDTA). 

\subsubsection*{High-molecular weight DNA isolation}

HMW DNA was isolated using the MagAttract HMW DNA Kit (Qiagen; cat $\#$ 67563) following the manufacturer's instructions.

\subsubsection*{Enzymatic treatment of naked DNA}

Naked DNA was treated under exactly the same conditions as chromatin except that the reaction volume and enzyme amounts were reduced in half. HMW DNA was purified as described above

\subsection*{GM12878 cell culture}

The GM12878 human lymphoblastoid cell lines were grown in media containing RPMI1640-GlutaMAX (Life Techno¬logies, Carlsbad, CA) supplemented with 10\% fetal bovine serum.

\subsection*{GM12878 SMAC-seq experiments}

Briefly 1$\times 10^6$ human GM12878 cells were washed with 1$\times$ PBS then resuspended in 200 $\mu$L ice-cold Nuclei Lysis Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1 mM EDTA, 0.5\% NP-40) and incubated on ice for 10 minutes. Nuclei were then centrifuged at 500 $g$ for 5 min at $4\,^{\circ}\mathrm{C}$, resuspended in 200 $\mu$L cold Nuclei Wash Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl2, 0.1 mM EDTA), and centrifuged again at 500 $g$ for 5 min at $4\,^{\circ}\mathrm{C}$. Finally, nuclei were resuspended in 200 $\mu$L reaction buffer (1$\times$ NEB CutSmart buffer, 0.3 M sucrose). Nuclei were then treated with EcoGII by adding 200 units of EcoGII (NEB) and SAM at 0.6 mM, and incubating at $37\,^{\circ}\mathrm{C}$ for 10 min. The reaction was stopped by adding 0.2\% SDS, and HMW DNA was immediately isolated as previously described.

\subsection*{SMAC-seq analysis}

\subsubsection*{Nanopore sequencing}

HMW DNA was converted into libraries using the Ligation Sequencing Kit 1D (Oxford Nanopore Technologies, SQK-LSK108) following the manufacturer's instructions. Nanopore sequencing was carried out on R9.4 MinION flowcells (Oxford Nanopore Technologies) for up to 48 hours. 

\subsubsection*{Nanopore base calling}

Nanopore events were converted to DNA sequence using Albacore (V2.3.3) using default settings. Reads were resquiggled using Tombo\cite{Stoiber2017}, version 1.3, using the \verb|sacCer3| reference genome. Methylated bases were identified using Tombo in the ``de novo model'' mode. 

\subsubsection*{Aggregation of accessibility information over multibasepair windows}

Even with the addition of m$^6$A methylation, the resolution of SMAC-seq still does not cover every nucleotide in the genome, and it varies substantially between different locations depending on local sequence content differences. In addition to that, nanopore base calling is still far from being a fully resolved problem, and even more so in methylation-aware mode. For these reasons, for many of the analyses described in this study we aimed at assigning aggregate accessibility scores over windows, taking the totality of the available evidence into account, thus obtaining more reliable, if coarser-grained, views of accessibility patterns along the genome. We used a Bayesian approach to carry out aggregation, as follows.

For a given window of width $w$ in the genome, specified by coordinates $c,i,i+w$ (where $c$ denotes the chromosome, and $i$ the leftmost coordinate of the window), and for all reads $r \in R_{c,i,i+w}$ fully spanning the window, we obtain all Tombo probabilities $p_{r,(c,j)}$ such that $j \in [i,i+w)$ for sequence contexts CpG, GpC and A on the corresponding genomic strand. We use a Beta prior $B$($\alpha$,$\beta$), with $\alpha = \beta = 10$, which we then updated based on each probability $p_{r,(c,j)}$ for all $j \in [i,i+w)$. The final binary accessibility score $p_{r,(c,i,i+w)}$ for read $r$ and window $c,i,i+w$ is determined by the final state of the prior. 

\subsubsection*{Read filtering}

As discussed above, we sometimes observe a population of reads that are fully methylated across their whole length or over large segments of it. There reads most likely derive from dead cells, as our initial experiment, which was carried out on a very dense yeast population containing a substantial number of dead cells, exhibited much higher proportion of such reads compared to subsequent experiments using early log-phase cells. In order to remove such potentially artifactual reads, ``filtered'' sets of reads were obtained by removing all reads containing a $\geq$1-kbp stretch that is $\geq$75\% methylated (while also filtering out reads shorter than 1 kb).

\subsubsection*{Read clustering}

For most analyses presented in this manuscript, the \verb|tglkmeans| package was used to cluster SMAC-seq reads (implemented in \verb|R|, \burl{https://bitbucket.org/tanaylab/tglkmeans}. In addition, the hierarchical clustering implementation in \verb|scipy| was also used in certain cases. 

\subsubsection*{Co-accessibility assessment using Normalized Mutual Information}

To evaluate co-accessibility patterns along the genome, we applied a Normalized Mutual Information as follows. Each chromosome in the genome $c$ was split into windows of size $w$. For each such window $(c,i,i+w)$, we identified the maximum range to the right of it, $(c,j,j+w)$ such that the span $(c,i,j+w)$ is covered by $\geq M$ reads. All reads spanning $(c,i,j+w)$ were then extracted and subsampled down to $M$ reads (usually $M = 100$, unless specified otherwise). Accessibility scores were then aggregated and binarized as described above for all windows located in the span $(c,i,j+w)$, and for all $M$ reads fully spanning it, resulting in a local co-accessibility matrix $LCM$ of size $M \times (j+w-i)/w$. We then calculated Normalized Mutual Information scores for each pair of columns $LCM_k$ and $LCM_l$ as follows:

\begin{equation}
\begin{split}
MI(LCM_k,LCM_l) & = p(0,0) \log_2{ \left(\cfrac{p(0,0)}{p_k(0)\,p_l(0)}\right) } \\
                & + p(1,1) \log_2{ \left(\cfrac{p(1,1)}{p_k(1)\,p_l(1)}\right) } \\
                & + p(0,1) \log_2{ \left(\cfrac{p(0,1)}{p_k(0)\,p_l(1)}\right) } \\
                & + p(1,0) \log_2{ \left(\cfrac{p(1,0)}{p_k(1)\,p_l(0)}\right) } \\
\end{split}
\end{equation}

MI scores were then normalized and rescaled in the interval $(-1,1)$:

\end{multicols}
\par\noindent\rule{\dimexpr(0.5\textwidth-0.5\columnsep-0.4pt)}{0.4pt}%
\rule{0.4pt}{6pt}

\begin{equation}
NMI(LCM_k,LCM_l) = \begin{cases}
\cfrac{MI(LCM_k,LCM_l)}{\sqrt{H(LCM_k)H(LCM_l)}} & \text{ for } p(0,0) + p(1,1) \geq 0.5 \\
-\cfrac{MI(LCM_k,LCM_l)}{\sqrt{H(LCM_k)H(LCM_l)}} & \text{ for } p(0,0) + p(1,1) < 0.5
\end{cases}
\end{equation}

\vspace{\belowdisplayskip}\hfill\rule[-6pt]{0.4pt}{6.4pt}%
\rule{\dimexpr(0.5\textwidth-0.5\columnsep-1pt)}{0.4pt}
\begin{multicols}{2}

Where $H$ refers to the entropy of each individual distribution. 

For computational efficiency, local NMI matrices were calculated for even-sized (50kb) evenly spaced (every 10kb) tiles of the genome. The entries of the general genome-wide NMI matrix were then calculated as the average of all local NMI matrices containing each entry.	

\subsubsection*{Testing for coordinated accessibility}

Coordinated accessibility was evaluated as follows. For each pair of locations $(c,i_1,i_1 + r_1)$ and $(c,i_2,i_2 + r_2)$ (usually $r_1 = r_2$), a minimum number of reads $N$ was required that fully spans the $(c,i_1,i_2 + r)$ interval. All such reads were then obtained for each pair, and then subsampled multiple times down to $N$ reads (in order not to introduce bias in coordinated accessibility tests arising due to differential read coverage between locations closer/further apart). For each subsampling, the fraction of accessible regions $p_1$ and $p_2$ was estimated for each of the two locations using the Bayesian procedure described above, as well as the distribution of joint accessibilities over the four states $(0,0)$, $(1,0)$, $(0,1)$, and $(1,1)$. The two halves of the reads were then virtually split in half and recombined for a total of $10^3$ random combinations. The empirical distribution $\mathcal{N}(\mu,\sigma)$ of the four states was then estimated from these random combinations, where $\mu = N*(p_{(0,0)} + p_{(1,1)})$  if $p_{(0,0)} + p_{(1,1)} > 0.5$ and $\mu = N*(p_{(1,0)} + p_{(1,0)})$  if $p_{(0,0)} + p_{(1,1)} \leq 0.5$. Empirical coordinated accessibility $p$-values were then estimated based on the observed counts $|(0,0)| + |(1,1)|$ if $|(0,0)| + |(1,1)| > 0.5*N$ or $|(0,1)| + |(1,0)|$ if $|(0,0)| + |(1,1)| \leq 0.5*N$. Bonferroni correction was applied to account for multiple hypothesis testing.

\subsection*{dSMF and Bisulfite sequencing}

Illumina measurements of CpG and GpC methylation levels were carried out using the PBAT protocol\cite{Miura2012} with modifications. HMW DNA ($\sim$500 ng) was bisulfite converted using the EZ DNA Methylation-Lightning Kit (Zymo, Cat $\#$ D5030) by mixing 20 $\mu$L of purified DNA ($\sim$500 ng) with 130 $\mu$L DNA Methylation Lightning Conversion reagent and incubating at $98\,^{\circ}\mathrm{C}$ for 8 minutes and then at $64\,^{\circ}\mathrm{C}$ for 60 minutes. Bisulfite converted DNA was then cleaned up using the EZ DNA Methylation-Lightning Kit following manufacturer's instructions. 

First strand synthesis was carried out by mixing 20 $\mu$L bisulfite converted DNA, 19.75 $\mu$L H$_2$O, 5 $\mu$L 10$\times$ Blue Buffer (ThermoFisher), 1.25 $\mu$L 10 mM dNTP (NEB), and 4 $\mu$L custom-designed biotinylated adapter. Samples were then incubated at $94\,^{\circ}\mathrm{C}$ for 5 minutes, and at $4\,^{\circ}\mathrm{C}$ for 5 minutes, after which 1.5 $\mu$L Klenow (3$^\prime$ $\rightarrow$ 5$^\prime$ exo minus; MCLab) were added, and the reaction was incubated at $4\,^{\circ}\mathrm{C}$ for 15 minutes, at $37\,^{\circ}\mathrm{C}$ for 90 minutes, and at $70\,^{\circ}\mathrm{C}$ for 5 minutes. First-strand reaction cleanup was carried out using 50 $\mu$L AMPure XP beads (Beckman Coulter); DNA was eluted 50 $\mu$L EB buffer. 

Biotinylated DNA was captured on streptavidin beads. A total of 20 $\mu$L streptavidin Dybaneads M-280 (ThermoFisher) per sample were added to a PCR tube, separated on a magnet and then resuspended in 50 $\mu$L 2$\times$ BW(Li) buffer (6.3 g LiCl, 0.5 mL Tris-HCL pH 8.0, and 0.1 mL 500 mM EDTA for 50 mL total volume), to which the 50 $\mu$L of eluted first-strand reaction DNA was added. Beads were then incubated at room temperature for 30 minutes, washed with 180 $\mu$L 2$\times$ BW(Li) buffer, twice with 0.1 N NaOH (by resuspending well and incubating at room temperature for 2 minutes), washed again with 180 $\mu$L 2$\times$ BW(Li) buffer, then with 180 $\mu$L 10 mM Tris-HCL pH 7.5. 

Second-strand synthesis was carried out by resuspending streptavidin beads in the following reaction mix: 5 $\mu$L 10 $\times$ Blue Buffer, 1.25 $\mu$L 10 mM dNTPs, 39.75 $\mu$L H$_2$O, 4 $\mu$L custom-designed second-strand adapter. Samples were then incubated at $94\,^{\circ}\mathrm{C}$ for 5 minutes, and at $4\,^{\circ}\mathrm{C}$ for 5 minutes, after which 1.5 $\mu$L Klenow (3$^\prime$ $\rightarrow$ 5$^\prime$ exo minus) were added, followed by further incubation at $4\,^{\circ}\mathrm{C}$ for 15 minutes, at $37\,^{\circ}\mathrm{C}$ for 30 minutes, and at $70\,^{\circ}\mathrm{C}$ for 5 minutes. 

Beads were separated on magnet and the chase reaction was carried out by resuspending in a mix of 5 $\mu$L 10$\times$ Thermo Pol Buffer, 1.25 $\mu$L 10 mM dNTPs, 43.5  $\mu$L H$_2$O, and 1  $\mu$L \textit{Bst} DNA Polymerase Large Fragment (NEB). Samples were incubated at $65\,^{\circ}\mathrm{C}$ for 30 minutes, then again separated on magnet.

PCR was performed on beads in 50 $\mu$L reactions composed of 25 $\mu$L 2$\times$ NEB Next PCR Master Mix, 20 $\mu$L H$_2$O, 2.5 $\mu$L i7 and 2.5 $\mu$L i5 primers (both custom-designed), with initial extension at 72$\,^{\circ}\mathrm{C}$ for 3 min, denaturation at 98$\,^{\circ}\mathrm{C}$ for 30 sec, 15 cycles of 98$\,^{\circ}\mathrm{C}$ for 10 sec, 63$\,^{\circ}\mathrm{C}$ for 30 sec, and 72$\,^{\circ}\mathrm{C}$ for 30 sec, and final extension at 72$\,^{\circ}\mathrm{C}$ for 5 min. PCR reactions were cleaned up and size-selected using AMPure XP beads. 

Libraries were sequenced on Illumina NextSeq or MiSeq instruments, as 2$\times$75mers or 2$\times$300mers, respectively. 

\subsection*{dSMF data processing}

Bisulfite reads were trimmed using cutadapt (version 0.16) and Trim Galore (version 0.4.4), using the following settings (taking into account that the bisulfite sequencing libraries are generated with the PBAT protocol): \verb|--clip_R1 9| \verb|--clip_R2 9| \verb|--three_prime_clip_r1 6| \verb|--three_prime_clip_r2 6| \verb|--paired|. Trimmed reads were the mapped to the \verb|sacCer3| version of the yeast genome using Bismark\cite{Bismark2011} (version 0.19.0) with the following settings: \verb|--bowtie2| \verb|--pbat|. Methylation calls were extract using the \verb|bismark_methylation_extractor| program within Bismark and the following settings: \verb|-s| \verb|--no_overlap| \verb|--comprehensive| \verb|--merge_non_CpG| \verb|--cytosine_report| \verb|--CX|. 

\subsection*{ATAC-seq}

ATAC-seq was carried out on the same nuclei isolated for SMAC-seq as described above (before resuspension in M.CviPI Reaction Buffer), by resuspending nuclei with 25 $\mu$L 2$\times$ TD buffer (20 mM Tris-HCl pH 7.6, 10 mM MgCl$_2$, 20\% Dimethyl Formamide), 2.5 $\mu$L transposase (custom produced) and 22.5 $\mu$L nuclease-free H$_2$O, and incubating at $37\,^{\circ}\mathrm{C}$ for 30 min in a Thermomixer at 1000 RPM. Transposed DNA was isolated using the DNA Clean \& Concentrator Kit (Zymo, cat $\#$ D4014) and PCR amplified as described before\cite{Corces2017}. Libraries were then sequenced on a Illumina NextSeq instrument as 2$\times$36mers or  as 2$\times$75mers. 

\subsection*{ATAC-seq data processing}

Demultipexed fastq files were mapped to the \verb|sacCer3| assembly of the \textit{S. cerevisiae} genome as 2$\times$36mers using Bowtie\cite{Bowtie2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). 

\subsection*{ChIP-seq experiments}

Cell lysis and ChIP reactions were performed as previously described\cite{Hu2015} with minor modifications. Cells were fixed with 1\% formaldehyde for 20 minutes (Rpb1-CTD and Rbp1-CTD-S2P ChIP) or 30 minutes (Hsf1-V5 ChIP) and quenched with 0.125 M glycine for 5 minutes. A total of $\sim$50 ODs of cells were used per Rpb1-CTD or Rpb1-CTD-S2P ChIP and $\sim$300 ODs per Hsf1-V5 ChIP. Fixed cell were washed 2$\times$ in cold 1$\times$ PBS, pelleted and stored at $-80\,^{\circ}\mathrm{C}$. Pellets were lysed in 300 $\mu$L FA lysis buffer (50 mM HEPES--KOH pH 8.0, 150 mM NaCl, 1 mM EDTA, 1\% Triton X-100, 0.1\% sodium deoxycholate, 1 mM PMSF, Roche protease inhibitor) with $\sim$1 mL ceramic beads on a Fastprep-24 (MP Biomedicals). The entire lysate was then collected and adjusted to 1 mL with FA lysis buffer before sonication with a 1/8' microtip on a Q500 sonicator (Qsonica) for 14 minutes (10 seconds on, 20 seconds off). The sample tube was held in a $-20\,^{\circ}\mathrm{C}$ 80\% ethanol bath throughout sonication to prevent sample heating. After sonication, cell debris was pelleted and the supernatant was retained for ChIP. For each ChIP reaction, 30 $\mu$L Protein G Dynabeads (Invitrogen) were blocked (PBS + 0.5\% BSA), prebound with 5-10 $\mu$L antibody (8wG16 Rpb1-CTD, Abcam cat $\#$ ab817); 3E10 Rpb1-CTD-S2P, Milipore cat $\#$ 04-1571-1) or SV5-Pk1 (anti-V5, BioRad cat $\#$ MCA1360G)) and washed 1$\times$ with PBS before incubation with supernatant ($4\,^{\circ}\mathrm{C}$, overnight). Dynabeads were then washed (5 minutes per wash) 3$\times$ in FA lysis buffer, 3$\times$ in high-salt FA lysis buffer (50 mM Hepes-KOH pH 8.0, 500 mM NaCl, 1 mM EDTA, 1\% Triton X-100, 0.1\% sodium deoxycholate, 1 mM PMSF), 1$\times$ in ChIP wash buffer (10 mM Tris-HCl pH 7.5, 0.25 M LiCl, 0.5\% NP-40, 0.5\% sodium deoxycholate, 1 mM EDTA, 1 mM PMSF) and 1$\times$ in TE wash buffer (10 mM Tris-HCl pH 7.5, 1 mM EDTA, 50 mM NaCl). DNA was eluted from the beads in ChIP elution buffer (50 mM Tris-HCl pH 7.5; 10 mM EDTA; 1\% SDS) at $65\,^{\circ}\mathrm{C}$ for 20 min. Eluted DNA was incubated at $65\,^{\circ}\mathrm{C}$ overnight to reverse crosslinks, before treatment with RNAse A ($37\,^{\circ}\mathrm{C}$, 1 hour) and then Proteinase K ($65\,^{\circ}\mathrm{C}$, 2 hours). DNA was purified using the ChIP DNA Clean \& Concentrator kit (Zymo Research). Sequencing libraries were generated using the NEBNext Ultra II DNA Library Prep kit  (NEB Cat $\#$ E7645) and sequenced on a Illumina NextSeq instrument as 2$\times$36mers or as 2$\times$75mers. 

\subsection*{ChIP-seq data processing}

Demultipexed fastq files were mapped to the \verb|sacCer3| assembly of the \textit{S. cerevisiae} genome as 2$\times$36mers using Bowtie\cite{Bowtie2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Hsf1 peaks were called using MACS2\cite{Feng2012} (version 2.1.0) with the following settings: \verb|-g 12000000| \verb|-f BAMPE|.

\subsection*{Multiread-preserving alignment and normalization}

Multiread-preserving alignment and track generation was carried out by mapping reads to the \verb|sacCer3| assembly of the \textit{S. cerevisiae} genome using Bowtie\cite{Bowtie2009} with the following settings: \verb|-v 2| \verb|-a| \verb|--best| \verb|--strata|. Each alignment was then given a weight inversely proportional to the number of locations that the read maps to i.e. each position's score was normalized to RPMs as follows:

\begin{equation}\label{multireads}
S_{c,i} = \cfrac{\displaystyle \sum_{R \in R_{c,i}}{\cfrac{1}{NH_R}}}{\cfrac{|R|}{10^6}}
\end{equation}

Where $NH_R$ is the number of locations in the genome a read maps to.

\subsection*{RNA-seq experiments}

Cells (1 mL) were pelleted and flash frozen in liquid N$_2$. Pellets were resuspended in 300 $\mu$L TRIzol and lysed with $\sim$1 mL ceramic beads on a Fastprep-24 (MP Biomedicals). Cell debris were pelleted and RNA was extracted from the supernatant using the Direct-Zol RNA Microprep Kit (Zymo Research). RNA-seq libraries were generated using the NEBNext Ultra Directional RNA Library Prep Kit (NEB Cat $\#$ E7420)

\subsection*{RNA-seq processing and gene expression quantification}

RNA-seq reads were mapped to the yeast genome as 1$\times$50mers (external datasets) or 2$\times$75mers (diamide experiments) using TopHat version 2.0.8\cite{Trapnell2012}. Gene-level  quantifications in FPKMs (Fragments Per Kilobase per Million mapped reads) were generated using Cufflinks version 2.0.2\cite{Trapnell2012}. The mean from all replicates was taken as the expression level for each gene for subsequent analyses. 

\subsection*{External sequencing datasets}

A number of previously published \textit{S. cerevisiae} genomics datasets were used in this study. ChIP-exo reads and called peaks for Abf1, Cbf1, Rap1 and Reb1 were downloaded from GEO accessions GSE93662 and GSE72106. ChIP-seq data for centromeric proteins was downloaded from GEO accessions GSE31466 and GSE51949. PRO-seq and PRO-CAP data was obtained from GEO accession GSE76142. ORC ChIP-seq data was downloaded from GEO accession GSE16926. DNase-seq was downloaded from GEO accession GSE69651 while DGF data was downloaded from SRA accession SRP000620. MNase-seq data was obtained from GEO accessions GSE26493 and GSE29292, TBP ChIP-seq from GSE44200, Rpb1 ChIP-seq from GSE93190, Rpb3 ChIP-seq from GSE74787, RPC128 ChIP-seq from GSE39566, and Mediator subunits ChIP-seq from GSE95051. RNA-seq data from accession GSE85590 was also used. Except where otherwise stated, raw reads were aligned using Bowtie\cite{Langmead2009} with the following settings: \verb|-v 2| \verb|--k 2| \verb|-m 1| \verb|--best| \verb|--strata|, with the addition of \verb|-X 1000| for paired-end reads. Paired-end reads were aligned as 2$\times$25mers), while single-end reads were aligned as 1$\times$36mers. PRO-seq and PRO-CAP data was aligned as 1$\times$16mers. 

\subsection*{Micro-C data and processing}

Micro-C data was downloaded from GEO accession GSE68016 and processed as described in the original publication\cite{Hsieh2015}.

\subsection*{Transcription factor motif mapping}

Transcription factor motif recognition sequences were mapped genome-wide using FIMO\cite{Grant2011} (version 4.11.2 of the MEME-Suite\cite{Bailey2009} using the CIS-BP database\cite{Weirauch2014} as a reference set of position weight matrices. 

\subsection*{Gene annotation update}

Publicly available gene models for \textit{S. cerevisiae} do not contain TSS and TTS information for a major fraction of genes in the genome, only including the coding (``CDS'') portions instead. As the omission of UTRs presents a problem for TSS- and TTS-centered analyses, we updated the existing gene models following the approach described previously\cite{Schep2015} and the \textit{S. cerevisiae} TIF-seq dataset from GEO accession GSE39128\cite{Pelechano2013}. New TSS and TTS positions were assigned to each gene for which such information was available based on the median UTR length as measured by TIF-seq.

\subsection*{Nucleosome positioning information}

H4S47C\cite{Brogaard2012,Ramachandran2015} chemical mapping data was downloaded form GEO accessions GSE59523 and GSE36063. H3Q85C\cite{Chereji2018} chemical mapping data was downloaded from GEO accession GSE97290. We used the nucleosome positioning calls obtained from the original 2012 Brogaard et al. study for our analyses, after transforming them from coordinates in the \verb|sacCer2| version of the \textit{S. cerevisiae} genome assembly to \verb|sacCer3| using the \verb|liftOver| function in the UCSC Genome Browser utilities toolkit.

\subsection*{Mappability tracks generation}

To evaluate unique read mappability, the whole genome was tiled with reads of given length at every position. The reads were then mapped back to the genome using the same settings used to map single-end ChIP-seq reads. For every position coverage by mapped reads was calculated, and mappability was scored as the ratio between read coverage and the read length used to tile the genome.

\subsection*{Data availability}

Short read datasets associated with this study are available through GEO accession GSE128290. 

\section*{Author contributions}

Z.S, G.K.M and N.A.S.A. conceived and designed the study. Z.S, G.K.M and N.A.S.A. performed initial experiments. Z.S., M.P.S. and G.K.M. performed diamide time course experiments. G.K.M. and Z.S. analyzed data. W.J.G. and A.K. supervised the study. G.K.M., Z.S., W.J.G. and A.K. wrote the manuscript with input from all authors. 

\section*{Acknowledgments}

This work was supported by NIH grants (P50HG007735, RO1 HG008140, U19AI057266 and UM1HG009442 to W.J.G., 1UM1HG009436 to W.J.G. and A.K., 1DP2OD022870-01 and 1U01HG009431 to A.K.), the Rita Allen Foundation (to W.J.G.), the Baxter Foundation Faculty Scholar Grant, and the Human Frontiers Science Program grant RGY006S (to W.J.G). W.J.G is a Chan Zuckerberg Biohub investigator and acknowledges grants 2017-174468 and 2018-182817 from the Chan Zuckerberg Initiative. Z.S. is supported by EMBO Long-Term Fellowship EMBO ALTF 1119-2016 and by Human Frontier Science Program Long-Term Fellowship HFSP LT 000835/2017-L G.K.M. is supported by the Stanford School of Medicine Dean's Fellowship. N.A.S.A. is funded by the Department of Defense through a National Defense Science and Engineering Grant and by a Stanford Graduate Fellowship. The authors would also like to thank members of the Greenleaf and Kundaje labs for their helpful suggestions and discussions on the subject over the course of the study.

\begin{thebibliography}{100}

\bibitem{Wu1980}Wu C. 1980. The 5$^{\prime}$ ends of \textit{Drosophila} heat shock genes in chromatin are hypersensitive to DNase I. \textit{Nature} \textbf{286}(5776):854--860.

\bibitem{Keen1981}Keene MA, Corces V, Lowenhaupt K, Elgin SC. 1981. DNase I hypersensitive sites in Drosophila chromatin occur at the 5$^{\prime}$ ends of regions of transcription. \textit{Proc Natl Acad Sci U S A} \textbf{78}(1):143--146.

\bibitem{McGhee1981}McGhee JD, Wood WI, Dolan M, Engel JD, Felsenfeld G. 1981. A 200 base pair region at the 5$^{\prime}$ end of the chicken adult $\beta$-globin gene is accessible to nuclease digestion. \textit{Cell} \textbf{27}(1 Pt 2):45--55.

\bibitem{Dorschner2004}Dorschner MO, Hawrylycz M, Humbert R, Wallace JC, Shafer A, Kawamoto J, Mack J, Hall R, Goldy J, Sabo PJ, Kohli A, Li Q, McArthur M, Stamatoyannopoulos JA. 2004. High-throughput localization of functional elements by quantitative chromatin profiling. \textit{Nat Methods} \textbf{1}(3):219--225.

\bibitem{Sabo2006}Sabo PJ, Kuehn MS, Thurman R, Johnson BE, Johnson EM, Cao H, Yu M, Rosenzweig E, Goldy J, Haydock A, Weaver M, Shafer A, Lee K, Neri F, Humbert R, Singer MA, Richmond TA, Dorschner MO, McArthur M, Hawrylycz M, Green RD, Navas PA, Noble WS, Stamatoyannopoulos JA. 2006. Genome-scale mapping of DNase I sensitivity in vivo using tiling DNA microarrays. \textit{Nat Methods} \textbf{3}(7):511--518.

\bibitem{Boyle2008}Boyle AP, Davis S, Shulha HP, Meltzer P, Margulies EH, Weng Z, Furey TS, Crawford GE. 2008. High-resolution mapping and characterization of open chromatin across the genome. \textit{Cell} \textbf{132}(2):311--322. % doi: 10.1016/j.cell.2007.12.014.

\bibitem{Hesselberth2009}Hesselberth JR, Chen X, Zhang Z, Sabo PJ, Sandstrom R, Reynolds AP, Thurman RE, Neph S, Kuehn MS, Noble WS, Fields S, Stamatoyannopoulos JA. 2009. Global mapping of protein-DNA interactions in vivo by digital genomic footprinting. \textit{Nat Methods} \textbf{6}(4):283--289. % doi: 10.1038/nmeth.1313. 

\bibitem{Buenrostro2013}Buenrostro JD, Giresi PG, Zaba LC, Chang HY, Greenleaf WJ. 2013. Transposition of native chromatin for fast and sensitive epigenomic profiling of open chromatin, DNA-binding proteins and nucleosome position. \textit{Nat Methods} \textbf{10}(12):1213--1218. % doi: 10.1038/nmeth.2688. 

\bibitem{Schones2008}Schones DE, Cui K, Cuddapah S, Roh TY, Barski A, Wang Z, Wei G, Zhao K. 2008. Dynamic regulation of nucleosome positioning in the human genome. \textit{Cell} \textbf{132}(5):887--898. % doi: 10.1016/j.cell.2008.02.022.

\bibitem{Lieberman2009}Lieberman-Aiden E, van Berkum NL, Williams L, Imakaev M, Ragoczy T, Telling A, Amit I, Lajoie BR, Sabo PJ, Dorschner MO, Sandstrom R, Bernstein B, Bender MA, Groudine M, Gnirke A, Stamatoyannopoulos J, Mirny LA, Lander ES, Dekker J. 2009. Comprehensive mapping of long-range interactions reveals folding principles of the human genome. \textit{Science} \textbf{326}(5950):289--293. % doi: 10.1126/science.1181369.

\bibitem{Fullwood2009}Fullwood MJ, Liu MH, Pan YF, Liu J, Xu H, Mohamed YB, Orlov YL, Velkov S, Ho A, Mei PH, Chew EG, Huang PY, Welboren WJ, Han Y, Ooi HS, Ariyaratne PN, Vega VB, Luo Y, Tan PY, Choy PY, Wansa KD, Zhao B, Lim KS, Leow SC, Yow JS, Joseph R, Li H, Desai KV, Thomsen JS, Lee YK, Karuturi RK, Herve T, Bourque G, Stunnenberg HG, Ruan X, Cacheux-Rataboul V, Sung WK, Liu ET, Wei CL, Cheung E, Ruan Y. 2009. An oestrogen-receptor-alpha-bound human chromatin interactome. \textit{Nature} \textbf{462}(7269):58--64.

\bibitem{Mumbach2016}Mumbach MR, Rubin AJ, Flynn RA, Dai C, Khavari PA, Greenleaf WJ, Chang HY. 2016. HiChIP: efficient and sensitive analysis of protein-directed genome architecture. \textit{Nat Methods} \textbf{13}(11):919--922. % doi: 10.1038/nmeth.3999. 

\bibitem{Risca2017}Risca VI, Denny SK, Straight AF, Greenleaf WJ. 2017. Variable chromatin structure revealed by in situ spatially correlated DNA cleavage mapping. \textit{Nature} \textbf{541}(7636):237--241. % doi: 10.1038/nature20781. 

\bibitem{Boettiger2016}Boettiger AN, Bintu B, Moffitt JR, Wang S, Beliveau BJ, Fudenberg G, Imakaev M, Mirny LA, Wu CT, Zhuang X. 2016. Super-resolution imaging reveals distinct chromatin folding for different epigenetic states. \textit{Nature} \textbf{529}(7586):418--422. % doi: 10.1038/nature16496. 

\bibitem{Ou2017}Ou HD, Phan S, Deerinck TJ, Thor A, Ellisman MH, O'Shea CC. 2017. ChromEMT: Visualizing 3D chromatin structure and compaction in interphase and mitotic cells. \textit{Science} \textbf{357}(6349). % pii: eaag0025. doi: 10.1126/science.aag0025.

\bibitem{Kelly2012}Kelly TK, Liu Y, Lay FD, Liang G, Berman BP, Jones PA. 2012. Genome-wide mapping of nucleosome positioning and DNA methylation within individual DNA molecules. \textit{Genome Res} \textbf{22}(12):2497--2506. % doi: 10.1101/gr.143008.112. % NOME-seq

\bibitem{Nabilsi2014}Nabilsi NH, Deleyrolle LP, Darst RP, Riva A, Reynolds BA, Kladde MP. 2014. Multiplex mapping of chromatin accessibility and DNA methylation within targeted single molecules identifies epigenetic heterogeneity in neural stem cells and glioblastoma. \textit{Genome Res} \textbf{24}(2):329--339. % doi: 10.1101/gr.161737.113. %  MAPit-patch

\bibitem{Krebs2017}Krebs AR, Imanci D, Hoerner L, Gaidatzis D, Burger L, Sch\"ubeler D. 2017. Genome-wide Single-Molecule Footprinting Reveals High RNA Polymerase II Turnover at Paused Promoters. \textit{Mol Cell} \textbf{67}(3):411--422.e4. % doi: 10.1016/j.molcel.2017.06.027. 

\bibitem{Murray2018}Murray IA, Morgan RD, Luyten Y, Fomenkov A, Corr\^ea IR Jr, Dai N, Allaw MB, Zhang X, Cheng X, Roberts RJ. 2018. The non-specific adenine DNA methyltransferase M.EcoGII. \textit{Nucleic Acids Res} \textbf{46}(2):840--848. % doi: 10.1093/nar/gkx1191.

\bibitem{Simpson2017}Simpson JT, Workman RE, Zuzarte PC, David M, Dursi LJ, Timp W. 2017. Detecting DNA cytosine methylation using nanopore sequencing. \textit{Nat Methods} \textbf{14}(4):407--410. %  doi: 10.1038/nmeth.4184. 

\bibitem{Rand2017}Rand AC, Jain M, Eizenga JM, Musselman-Brown A, Olsen HE, Akeson M, Paten B. 2017. Mapping DNA methylation with high-throughput nanopore sequencing. \textit{Nat Methods} \textbf{14}(4):411--413. % doi: 10.1038/nmeth.4189. 

\bibitem{NanoMNAse2018}Baldi S, Krebs S, Blum H, Becker PB. 2018. Genome-wide measurement of local nucleosome array regularity and spacing by nanopore sequencing.  \textit{Nat Struct Mol Biol} \textbf{25}(9):894--901. % doi: 10.1038/s41594-018-0110-0. 

\bibitem{Feng2010}Feng S, Cokus SJ, Zhang X, Chen PY, Bostick M, Goll MG, Hetzel J, Jain J, Strauss SH, Halpern ME, Ukomadu C, Sadler KC, Pradhan S, Pellegrini M, Jacobsen SE. 2010. Conservation and divergence of methylation patterning in plants and animals. \textit{Proc Natl Acad Sci U S A} \textbf{107}(19):8689--8694. % doi: 10.1073/pnas.1002720107.

\bibitem{Zemach2010}Zemach A, McDaniel IE, Silva P, Zilberman D. 2010. Genome-wide evolutionary analysis of eukaryotic DNA methylation.  \textit{Science} \textbf{328}(5980):916--919. % doi: 10.1126/science.1186366.

\bibitem{Lister2008}Lister R, O'Malley RC, Tonti-Filippini J, Gregory BD, Berry CC, Millar AH, Ecker JR. 2008. Highly integrated single-base resolution maps of the epigenome in \textit{Arabidopsis}. \textit{Cell} \textbf{133}(3):523--536. % doi: 10.1016/j.cell.2008.03.029.

\bibitem{Brogaard2012}Brogaard K, Xi L, Wang JP, Widom J. 2012. A map of nucleosome positions in yeast at base-pair resolution. \textit{Nature} \textbf{486}(7404):496--501. % doi: 10.1038/nature11142. 

\bibitem{Stoiber2017}Stoiber MH, Quick J, Egan R, Lee JE, Celniker SE, Neely R, Loman N, Pennacchio L, Brown JB. 2017. De novo Identification of DNA Modifications Enabled by Genome-Guided Nanopore Signal Processing. \textit{bioRxiv} 094672 % doi: https://doi.org/10.1101/094672

\bibitem{Li2016}Li H. 2016. Minimap and miniasm: fast mapping and de novo assembly for noisy long sequences. \textit{Bioinformatics} \textbf{32}(14):2103--2110. % doi: 10.1093/bioinformatics/btw152. 

\bibitem{Yuan2005}Yuan GC, Liu YJ, Dion MF, Slack MD, Wu LF, Altschuler SJ, Rando OJ. 2005. Genome-scale identification of nucleosome positions in \textit{S. cerevisiae}. \textit{Science} \textbf{309}(5734):626--630. 

\bibitem{Cole2011}Cole HA, Howard BH, Clark DJ. 2011. The centromeric nucleosome of budding yeast is perfectly positioned and covers the entire centromere. \textit{Proc Natl Acad Sci U S A} \textbf{108}(31):12687--12692. % doi: 10.1073/pnas.1104978108. 

\bibitem{Henikoff2014}Henikoff S, Ramachandran S, Krassovsky K, Bryson TD, Codomo CA, Brogaard K, Widom J, Wang JP, Henikoff JG. 2014. The budding yeast Centromere DNA Element II wraps a stable Cse4 hemisome in either orientation in vivo. \textit{Elife} \textbf{3}:e01861. % doi: 10.7554/eLife.01861.

\bibitem{Merz2008}Merz K, Hondele M, Goetze H, Gmelch K, Stoeckl U, Griesenbeck J. 2008. Actively transcribed rRNA genes in \textit{S. cerevisiae} are organized in a specialized chromatin associated with the high-mobility group protein Hmo1 and are largely devoid of histone molecules. \textit{Genes Dev} \textbf{22}(9):1190--1204. % doi: 10.1101/gad.466908.

\bibitem{Conconi1989}Conconi A, Widmer RM, Koller T, Sogo JM. 1989. Two different chromatin structures coexist in ribosomal RNA genes throughout the cell cycle. \textit{Cell} \textbf{57}(5):753--761.

\bibitem{French2003}French SL, Osheim YN, Cioci F, Nomura M, Beyer AL. 2003. In exponentially growing \textit{Saccharomyces cerevisiae} cells, rRNA synthesis is determined by the summed RNA polymerase I loading rate rather than by the number of active genes. \textit{Mol Cell Biol} \textbf{23}(5):1558--1568.

\bibitem{Goetze2010}Goetze H, Wittner M, Hamperl S, Hondele M, Merz K, Stoeckl U, Griesenbeck J. 2010. Alternative chromatin structures of the 35S rRNA genes in \textit{Saccharomyces cerevisiae} provide a molecular basis for the selective recruitment of RNA polymerases I and II. \textit{Mol Cell Biol} \textbf{30}(8):2028--2045. % doi: 10.1128/MCB.01512-09. 

\bibitem{Panday2016}Panday A, Grove A. 2016. Yeast HMO1: Linker Histone Reinvented. \textit{Microbiol Mol Biol Rev} \textbf{81}(1). pii: e00037--16.

\bibitem{Jones2007}Jones HS, Kawauchi J, Braglia P, Alen CM, Kent NA, Proudfoot NJ. 2007. RNA polymerase I in yeast transcribes dynamic nucleosomal rDNA. \textit{Nat Struct Mol Biol} \textbf{14}(2):123--130. 

\bibitem{Huang2003}Huang J, Moazed D. 2003. Association of the RENT complex with nontranscribed and coding regions of rDNA and a regional requirement for the replication fork block protein Fob1 in rDNA silencing. \textit{Genes Dev} \textbf{17}(17):2162--2176. 

\bibitem{Zhu2018}Zhu F, Farnung L, Kaasinen E, Sahu B, Yin Y, Wei B, Dodonova SO, Nitta KR, Morgunova E, Taipale M, Cramer P, Taipale J. 2018. The interaction landscape between transcription factors and the nucleosome. \textit{Nature} \textbf{562}(7725):76--81. % doi: 10.1038/s41586-018-0549-5. 

\bibitem{OSullivan2004}O'Sullivan JM, Tan-Wong SM, Morillon A, Lee B, Coles J, Mellor J, Proudfoot NJ. 2004. Gene loops juxtapose promoters and terminators in yeast. \textit{Nat Genet} \textbf{36}(9):1014--1018.

\bibitem{Tan2012}Tan-Wong SM, Zaugg JB, Camblong J, Xu Z, Zhang DW, Mischo HE, Ansari AZ, Luscombe NM, Steinmetz LM, Proudfoot NJ. 2012. Gene loops enhance transcriptional directionality. \textit{Science} \textbf{338}(6107):671--675. % doi: 10.1126/science.1224350. 

\bibitem{Hsieh2015}Hsieh TH, Weiner A, Lajoie B, Dekker J, Friedman N, Rando OJ. 2015. Mapping Nucleosome Resolution Chromosome Folding in Yeast by Micro-C. \textit{Cell} \textbf{162}(1):108--119. % doi: 10.1016/j.cell.2015.05.048. 

\bibitem{Weiner2015}Weiner A, Hsieh TH, Appleboim A, Chen HV, Rahat A, Amit I, Rando OJ, Friedman N. 2015. High-resolution chromatin dynamics during a yeast stress response. \textit{Mol Cell} \textbf{58}(2):371--386. % doi: 10.1016/j.molcel.2015.02.002.

\bibitem{Morano2012}Morano KA, Grant CM, Moye-Rowley WS. 2012. The response to heat shock and oxidative stress in \textit{Saccharomyces cerevisiae}. \textit{Genetics} \textbf{190}(4):1157--1195. % doi: 10.1534/genetics.111.128033. 

\bibitem{Gabrieli2018}Gabrieli T, Sharim H, Fridman D, Arbib N, Michaeli Y, Ebenstein Y. 2018. Selective nanopore sequencing of human BRCA1 by Cas9-assisted targeting of chromosome segments (CATCH). \textit{Nucleic Acids Res} \textbf{46}(14):e87. % doi: 10.1093/nar/gky411.

\bibitem{TOPSeq}Sta\v{s}evskij Z, Gibas P, Gordevi\v{c}ius J, Kriukien\.e E, Klima\v{s}auskas S. 2017. Tethered Oligonucleotide-Primed Sequencing, TOP-Seq: A High-Resolution Economical Approach for DNA Epigenome Profiling. \textit{Mol Cell} \textbf{65}(3):554--564.e6. % doi: 10.1016/j.molcel.2016.12.012. 

\bibitem{Fu2015}Fu Y, Luo GZ, Chen K, Deng X, Yu M, Han D, Hao Z, Liu J, Lu X, Dor\'e LC, Weng X, Ji Q, Mets L, He C. 2015. N$^6$-methyldeoxyadenosine marks active transcription start sites in \textit{Chlamydomonas}. \textit{Cell} \textbf{161}(4):879--892. % doi: 10.1016/j.cell.2015.04.010. 

\bibitem{Wang2017}Wang Y, Chen X, Sheng Y, Liu Y, Gao S. 2017. N$^6$-adenine DNA methylation is associated with the linker DNA of H2A.Z-containing well-positioned nucleosomes in Pol II-transcribed genes in \textit{Tetrahymena}. \textit{Nucleic Acids Res} \textbf{45}(20):11594--11606. % doi: 10.1093/nar/gkx883.

\bibitem{Luo2018}Luo GZ, Hao Z, Luo L, Shen M, Sparvoli D, Zheng Y, Zhang Z, Weng X, Chen K, Cui Q, Turkewitz AP, He C. 2018. N$^6$-methyldeoxyadenosine directs nucleosome positioning in \textit{Tetrahymena} DNA. \textit{Genome Biol} \textbf{19}(1):200. % doi: 10.1186/s13059-018-1573-3.

\bibitem{Timinskas1995}Timinskas A, Butkus V, Janulaitis A. 1995. Sequence motifs characteristic for DNA [cytosine-N4] and DNA [adenine-N6] methyltransferases. Classification of all DNA methyltransferases. \textit{Gene} \textbf{157}(1--2):3--11.

\bibitem{Salter2016}Salter JD, Bennett RP, Smith HC. 2016. The APOBEC Protein Family: United by Structure, Divergent in Function. \textit{Trends Biochem Sci} \textbf{41}(7):578--594. % doi: 10.1016/j.tibs.2016.05.001. 

\bibitem{Kawasaki2017}Kawasaki F, Beraldi D, Hardisty RE, McInroy GR, van Delft P, Balasubramanian S. 2017. Genome-wide mapping of 5-hydroxymethyluracil in the eukaryote parasite \textit{Leishmania}. \textit{Genome Biol} \textbf{18}(1):23. % doi: 10.1186/s13059-017-1150-1.

\bibitem{Schep2015}Schep AN, Buenrostro JD, Denny SK, Schwartz K, Sherlock G, Greenleaf WJ. 2015. Structured nucleosome fingerprints enable high-resolution mapping of chromatin architecture within regulatory regions. \textit{Genome Res} \textbf{25}(11):1757--1770. % doi: 10.1101/gr.192294.115.

\bibitem{Miura2012}Miura F, Enomoto Y, Dairiki R, Ito T. 2012. Amplification-free whole-genome bisulfite sequencing by post-bisulfite adaptor tagging. \textit{Nucleic Acids Res} \textbf{40}(17):e136. 

\bibitem{Bismark2011}Krueger F, Andrews SR. 2011. Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. \textit{Bioinformatics} \textbf{27}(11):1571--1572. % doi: 10.1093/bioinformatics/btr167. 

\bibitem{Corces2017}Corces MR, Trevino AE, Hamilton EG, Greenside PG, Sinnott-Armstrong NA, Vesuna S, Satpathy AT, Rubin AJ, Montine KS, Wu B, Kathiria A, Cho SW, Mumbach MR, Carter AC, Kasowski M, Orloff LA, Risca VI, Kundaje A, Khavari PA, Montine TJ, Greenleaf WJ, Chang HY. 2017. An improved ATAC-seq protocol reduces background and enables interrogation of frozen tissues. \textit{Nat Methods} \textbf{14}(10):959--962. % doi: 10.1038/nmeth.4396. 

\bibitem{Bowtie2009}Langmead B, Trapnell C, Pop M, Salzberg SL. 2009. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. \textit{Genome Biol} \textbf{10}(3):R25. % doi: 10.1186/gb-2009-10-3-r25. 

\bibitem{Hu2015}Hu B, Petela N, Kurze A, Chan KL, Chapard C, Nasmyth K. 2015. Biological chromodynamics: a general method for measuring protein occupancy across the genome by calibrating ChIP-seq. \textit{Nucleic Acids Res} \textbf{43}(20):e132. % doi: 10.1093/nar/gkv670.

\bibitem{Feng2012}Feng J, Liu T, Qin B, Zhang Y, Liu XS. 2012. Identifying ChIP-seq enrichment using MACS. \textit{Nat Protoc} \textbf{7}(9):1728--1740.

\bibitem{Trapnell2012}Trapnell C, Roberts A, Goff L, Pertea G, Kim D, Kelley DR, Pimentel H, Salzberg SL, Rinn JL, Pachter L. 2012. Differential gene and transcript expression analysis of RNA-seq experiments with TopHat and Cufflinks. \textit{Nat Protoc} \textbf{7}(3):562--578.

\bibitem{Langmead2009}Langmead B, Trapnell C, Pop M, Salzberg SL. 2009. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. \textit{Genome Biol} \textbf{10}(3):R25. % doi: 10.1186/gb-2009-10-3-r25. 

\bibitem{Grant2011}Grant CE, Bailey TL, Noble WS. 2011. FIMO: scanning for occurrences of a given motif. \textit{Bioinformatics} \textbf{27}(7):1017--1018. % doi: 10.1093/bioinformatics/btr064.

\bibitem{Bailey2009}Bailey TL, Boden M, Buske FA, Frith M, Grant CE, Clementi L, Ren J, Li WW, Noble WS. 2009. MEME SUITE: tools for motif discovery and searching. \textit{Nucleic Acids Res} \textbf{37}(Web Server issue):W202--208. % doi: 10.1093/nar/gkp335.

\bibitem{Weirauch2014}Weirauch MT, Yang A, Albu M, Cote AG, Montenegro-Montero A, Drewe P, Najafabadi HS, Lambert SA, Mann I, Cook K, Zheng H, Goity A, van Bakel H, Lozano JC, Galli M, Lewsey MG, Huang E, Mukherjee T, Chen X, Reece-Hoyes JS, Govindarajan S, Shaulsky G, Walhout AJ, Bouget FY, Ratsch G, Larrondo LF, Ecker JR, Hughes TR. 2014. Determination and inference of eukaryotic transcription factor sequence specificity. \textit{Cell} \textbf{158}(6):1431--1443. % doi: 10.1016/j.cell.2014.08.009.

\bibitem{Pelechano2013}Pelechano V, Wei W, Steinmetz LM. 2013. Extensive transcriptional heterogeneity revealed by isoform profiling. \textit{Nature} \textbf{497}(7447):127--131. % doi: 10.1038/nature12121. 

\bibitem{Ramachandran2015}Ramachandran S, Zentner GE, Henikoff S. 2015. Asymmetric nucleosomes flank promoters in the budding yeast genome. \textit{Genome Res} \textbf{25}(3):381--390. % doi: 10.1101/gr.182618.114. 

\bibitem{Chereji2018}Chereji RV, Ramachandran S, Bryson TD, Henikoff S. 2018. Precise genome-wide mapping of single nucleosomes and linkers in vivo. \textit{Genome Biol} \textbf{19}(1):19. % doi: 10.1186/s13059-018-1398-0.

\bibitem{Liang2018}Liang Z, Shen L, Cui X, Bao S, Geng Y, Yu G, Liang F, Xie S, Lu T, Gu X, Yu H. 2018. DNA N$^6$-Adenine Methylation in \textit{Arabidopsis thaliana}. \textit{Dev Cell} \textbf{45}(3):406--416.e3. % doi: 10.1016/j.devcel.2018.03.012. 

\bibitem{Greer2015}Greer EL, Blanco MA, Gu L, Sendinc E, Liu J, Aristiz\'abal-Corrales D, Hsu CH, Aravind L, He C, Shi Y. 2015. DNA Methylation on N6-Adenine in \textit{C. elegans}. \textit{Cell} \textbf{161}(4):868--878. % doi: 10.1016/j.cell.2015.04.005. 

\bibitem{Fu2008}Fu Y, Sinha M, Peterson CL, Weng Z. 2008. The insulator binding protein CTCF positions 20 nucleosomes around its binding sites across the human genome. \textit{PLoS Genet} \textbf{4}(7):e1000138. % doi: 10.1371/journal.pgen.1000138.

\end{thebibliography}

\end{multicols}

\end{document}

