\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionAfterwards]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{0pt plus 0.3ex}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\titleformat{\subsubsection}[block]{\normalsize\sc\itshape\filright}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf The chromatin landscape of the euryarchaeon \textit{Haloferax volcanii}}
\renewcommand\Authfont{\scshape\normalsize}
\author[1,$\#$]{Georgi K. Marinov}
\author[1]{S. Tansu Bagdatli}
\author[2]{Tong Wu}
\author[2,3,4]{Chuan He}
\author[1,5]{Anshul Kundaje}
\author[1,6,7,8]{William J. Greenleaf}
\renewcommand\Affilfont{\itshape\small}
\affil[1]{Department of Genetics, Stanford University, Stanford, California 94305, USA}
\affil[2]{Department of Chemistry and Institute for Biophysical Dynamics, The University of Chicago, Chicago, IL, 60637, USA}
\affil[3]{Department of Biochemistry and Molecular Biology and Institute for Biophysical Dynamics, The University of Chicago, Chicago, IL, 60637, USA}
\affil[4]{Howard Hughes Medical Institute, The University of Chicago, Chicago, IL, 60637, USA}
\affil[5]{Department of Computer Science, Stanford University, Stanford, California 94305, USA}
\affil[6]{Center for Personal Dynamic Regulomes, Stanford University, Stanford, California 94305, USA}
\affil[7]{Department of Applied Physics, Stanford University, Stanford, California 94305, USA}
\affil[8]{Chan Zuckerberg Biohub, San Francisco, California, USA}
\affil[$\#$]{Corresponding author}
\date{}

\begin{document}
\maketitle

% \centerline{}
% \centerline{}
\begin{abstract}

\noindent {\normalsize \textbf{Archaea, together with Bacteria, represent the two main divisions of life on Earth, with many of the defining characteristics of the more complex eukaryotes tracing their origin to evolutionary innovations first made in their archaeal ancestors. One of the most notable such features is nucleosomal chromatin, although archaeal histones and chromatin differ significantly from those of eukaryotes. Despite increased interest in archaeal histones in recent years, the properties of archaeal chromatin have been little studied using genomic tools. Here, we adapt the ATAC-seq assay to archaea and use it to map the accessible landscape of the genome of the euryarchaeote \textit{Haloferax volcanii}. We integrate the resulting datasets with genome-wide maps of active transcription and single-stranded DNA (ssDNA) and find that while \textit{H. volcanii} promoters exist in a preferentially accessible state, modulation of transcriptional activity is not associated with changes in promoter accessibility, unlike the typical situation in eukaryotes. Applying orthogonal single-molecule footprinting methods, we quantify the absolute levels of physical protection of \textit{H. volcanii}, and find that archaeal nucleosomal chromatin is similarly or only slightly more accessible, in aggregate, than that of eukaryotes. We also evaluate the degree of coordination of transcription within archaeal operons and make the unexpected observation that some CRISPR arrays are associated with highly prevalent ssDNA structures. These results provide a foundation for the future functional studies of archaeal chromatin.
} 
}
\centerline{}
\centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig1V2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Archaeal ATAC-seq and the open chromatin landscape of \textit{H. volcanii}}. 
(A and B) Adaptation and optimization of the ATAC-seq assay to the archaeal context. (A) Distribution of TSS ratio scores (see Methods for details) for native, 0.1\%- and 1\%-formaldehyde ATAC-seq libraries. 
(C and D) Representative browser snapshots of ATAC-seq profiles along the \textit{H. volcanii} genome.
(E) Distribution of MACS2 ATAC-seq peaks relative to TSSs.
(F) High reproducibility of \textit{H. volcanii} chromatin accessibility measurements using ATAC-seq. Shown is the between-replicate correlation over TSSs in RPM (Read Per Million) units.
(G) Fragment length distribution in \textit{H. volcanii} ATAC-seq datasets.
(H) Estimated relative copy number of \textit{H. volcanii} chromosomes. Genomic DNA was tagmented and amplified ($n = 4$) and normalized read coverage was estimated for each chromosome/plasmid. The average ratios are shown.
(I) Global ATAC-seq profile over each of the five \textit{H. volcanii} chromosomes. The number in brackets corresponds to the magnification of the true proportional size of plasmids relative to the main chromosome.
}
\label{Fig1}
\end{figure*}

Life on earth is now understood to be divided into two deep fundamental clades -- Archaea and Bacteria. Archaea were only discovered as a separate branch of the tree of life in the 1970s\cite{Woese1977}, yet it was noticed very early on that they share a number of common features with the more organizationally complex eukaryotes, especially in the organization of their information processing cellular machinery. Based on these and other similarities it was suggested that eukaryotes evolved from archaea\cite{Lake1984,Lake1988}, a view strengthened in the phylogenomic era\cite{Cox2008}, and eventually solidified with the discovery of archaeal lineages such as the Lokiarchaeota\cite{Spang2015}. Thus, we now know that many of the complex cellular features that characterize eukaryotes trace their origins to their archaeal ancestry\cite{Koonin2010,Koonin2014}.

One of the most notable such features is nucleosomal chromatin. Nearly all eukaryotic genomes are packaged by nucleosomes, consisting of two tetramers of the four core histones H2A, H2B, H3 and H4, wrapping around $\sim$147 bp of DNA. These proteins are, with very rare exceptions\cite{Marinov2015dino,Marinov2016}, the most evolutionarily conserved among eukaryotes\cite{Postberg2010}, in large part because aside from their packaging function they are also subject to a large number of precisely regulated posttranslational modifications (PTMs) at key residues\cite{Jenuwein2001}, through which they play a pivotal role in all aspects of chromatin biology (transcription and its regulation, DNA replication, DNA repairs, mitosis, and others).

As early as the 1980s it was noticed that some archaea possess proteins and structures similar to eukaryotic histones and nucleosomes\cite{Searcy1980a,Searcy1980b}. We now know that most archaea have nucleosomal chromatin and histones\cite{Sandman2006,Henneman2018,Stevens2020}, and that these histones are ancestral to the eukaryotic histones. This is not true for all archaea as some lineages use other proteins such as Alba/Sac10b, Sul7d, Cren7, and CC1\cite{Zhang2012,Sandman2005} to package their genomes, but the most often observed state is to have histones.

However, archaeal histones differ substantially from those in eukaryotes -- while they share the core histone fold domain, they usually do not have the unstructured tails of H2A/H2B/H3/H4 that are the main sites of key PTMs. Archaeal histones also do not form octameric nucleosomes; instead, only one or a very small number of histone genes are found in archaeal genomes, and the structures they form are very different from those of eukaryotes. The diversity of histone sequences across the whole archaeal phylogeny is very large and still largely unexplored experimentally, but the available structural\cite{Mattiroli2017}, biochemical and modeling work suggests that in at least some species histones can form so called ``hypernucleosomes'' or ``archaeasomes'', consisting of a protein core of individual histones stacked next to each other, around which DNA is wrapped\cite{Henneman2018,Henneman2021}, ranging from 60 to 500 bp\cite{Bowerman2020}. It has also been proposed that archaeal histones exhibit an inherently dynamic association with DNA, resulting in so called chromatin 'slinkies' that can easily slide along DNA\cite{Bowerman2020}, in contrast to the much more stable association of nucleosomes with DNA in eukaryotes.

Despite the relevance of archaeal chromatin to understanding the deep evolution of chromatin organization, up until now the structure of archaeal chromatin has received direct experimental investigation using modern genomic tools, with the exception of early MNase-seq studies nearly a decade ago that mapped nucleosomal positioning in the euryarchaeotes \textit{Haloferax volcanii}\cite{Ammar2012} and \textit{Methanothermobacter thermautotrophicus} and \textit{Thermococcus kodakarensis}\cite{Nalabothula2013}. Furthermore, very little is known about the relationship between chromatin structure and the regulation of gene expression in these organisms.

To fill these gaps in our understanding of the organization of archaeal chromatin, we mapped chromatin accessibility and active transcription in \textit{Haloferax volcanii} using a combination of bulk and single-molecule techniques such as ATAC-seq\cite{ATAC} (Assay for Transposase-Accessible Chromatin using sequencing), NOMe-seq/dSMF\cite{Krebs2017} (Nucleosome Occupancy and Methylome sequencing/dual Single-Molecule footprinting) and KAS-seq\cite{Wu2020} (Kethoxal-assisted single-stranded DNA sequencing). We find that chromatin in \textit{H. volcanii} exhibits similar features to that of eukaryotes on a broad level, with preferentially accessible promoters regions. However, unlike in eukaryotes, chromatin accessibility at promoters does not relate to transcriptional activity. Using single-molecule footprinting we estimate absolute protein occupancy levels over the \textit{H. volcanii} to be comparable to or possibly slightly lower than in eukaryotes. However, unlike most eukaryotes, we do not observe stably positioned nucleosome protection footprints, but rather only statistically elevated accessibility around promoters. We also examine the coordination of transcriptional activity and chromatin accessibility within \textit{Haloferax} operons, and make the unexpected discovery that some CRISPR arrays are associated with very strong ssDNA signatures.

\section*{Results}

\subsection*{ATAC-seq reveals the open chromatin landscape of \textit{H. volcanii}}

\begin{figure*}
\begin{center}
\includegraphics[width=18.25cm]{Fig2-NOMe-V2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Absolute DNA occupancy/protection levels in \textit{H. volcanii}}. 
(A-C) TSS metaprofiles in different conditions (two replicates of an exponentially dividing culture, and a stationary culture).
(D) Single-molecule map (250-bp) around a main-chromosome TSS. Black indicates unmethylated and therefore protected sites, gray indicates methylated and thus accessible sites.
(E and F) Single-molecule maps over the pHV2 plasmid: 250 bp-window map (E) and very high coverage ($\geq$1,200 single molecules) 200 bp-window map (F).
}
\label{Fig2}
\end{figure*}

To study chromatin accessibility in archaea we adapted the ATAC-seq assay to the \textit{Haloferax volcanii} archaeon. \textit{H. volcanii} is a halophile with a strong preference for very high salt concentrations in the growth medium (see the Methods section), which grows optimally at 42$\,^{\circ}\mathrm{C}$\cite{Mullakhanbhai1975,Pohlschroder2019}, and is a widely used archaeal model system.

After extensive testing of a variety of different experimental protocols (fixation conditions and input cell numbers), we arrived at the following modifications of the standard ATAC protocol. First, because archaea are not eukaryotes and do not have a nucleus, we omitted the cell lysis and nuclei isolation step that is a standard feature of eukaryotic ATAC-seq protocols, such as the now standard omniATAC\cite{Corces2017}. Second, and most important, we reasoned that if the previously reported dynamic repositioning of archaeal nucleosomes along DNA occurs in \textit{H. volcanii}, optimal results might be obtained by introducing a crosslinking step into the standard ATAC protocol, which would ``freeze’’ nucleosomes in place and not allow transposition into DNA that might change from protected to accessible during the duration of the transposition reaction. Indeed, comparing the TSS (transcription start site) enrichment generated without fixation and with light (0.1\% formaldehyde) and strong (1\% formaldehyde) fixation showed that strong fixation produces optimal (by TSS enrichment) results (Figure \ref{Fig1}A). The optimal input cell number was determined to be $\sim$1 $\times 10^6$. We then compared the \textit{H. volcanii} ATAC-seq TSS metaprofile with that from the previously published MNase-seq dataset and observed the expected inverse relationship (Figure \ref{Fig1}B). \textit{H. volcanii} TSSs exhibit elevated accessibility in the 0 to -400 bp upstream region, decreasing away from the TSS, and displaying a weak periodicity of $\sim$60-70 bp.

Genome browser visualization of ATAC-seq profiles (Figure \ref{Fig1}C-D) revealed an accessibility landscape largely reminiscent of that in eukaryotes with compact genomes such as the budding yeast \textit{Saccharomyces cerevisiae}\cite{Shipony2020}, with accessibility concentrated at TSSs. Peak calling using MACS2\cite{MACS2} generalized that observation (Figure \ref{Fig1}E) -- nearly all ATAC-seq peaks are located within 200 bp of an annotated TSS. 

ATAC-seq measurements in \textit{H. volcanii} are also highly reproducible between experimental replicates (Figure \ref{Fig1}F). 

The \textit{H. volcanii} ATAC-seq fragment length distribution is unimodal, peaking at 90-100 bp, and does not show the eukaryotic mono-, di- and tri-nucleosomal signature  (Figure \ref{Fig1}G).

The \textit{H. volcanii} genome consists of multiple replicons\cite{Hartman2010}, with a main chromosome (``chr'') and four plasmids of very different size -- pHV4, pHV3, pHV1 and pHV2 (in order of decreasing size), which together comprise $\sim$30\% of the total genome. To properly interpret sequencing data (which is typically normalized to total read coverage), we determined the relative copy number distribution of these replicons using tagmented naked genomic DNA control samples generated during the optimization of the ATAC-seq protocol (Figure \ref{Fig1}G). The smallest plasmid -- pHV2 -- appears to exist in $\sim$26 copies for each main chromosome, pHV1 is found at $\sim$1.4 copies for each main chromosome, while the two large plasmids -- pHV4 and pHV3 -- exist in a 1:1 ratio to the main chromosome. 

As previous studies of chromatin openness in bacteria have reported the existence of very large domains of lower and higher accessibility\cite{Melfi2021}, we wondered whether the same is observed in archaea with nucleosomal chromatin. We do not observe such domains in our datasets (Figure \ref{Fig1}I). Recently, an ATAC-seq dataset was reported from the crenarchaeote \textit{Sulfolobus islandicus}\cite{Badel2022}, which lacks histones, and instead packages its genome mainly through Alba/Sac10b proteins\cite{Cajili2022,Bell2002}. In that species, large domains similar to those in bacteria were reported, suggesting that such large-scale domains might be a feature associated with the lack of nucleosomal chromatin in prokaryotes, while nucleosomal archaea such as \textit{H. volcanii} exhibit eukaryote-like organization. We also reexamined the \textit{Sulfolobus islandicus} dataset and found that it displays a much more modest TSS enrichment than that seen in \textit{H. volcanii}, which is also more narrowly concentrated around the TSS position (Supplementary Figure \ref{FigS1}).

\subsection*{Absolute DNA occupancy/protection levels in \textit{H. volcanii}}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig3-KAS.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The ssDNA and active transcription landscape in the \textit{H. volcanii} genome as measured by KAS-seq}. 
(A) Global KAS-seq profiles over each of the five \textit{H. volcanii} chromosomes in an exponential culture.
(B) High reproducibility of active transcription measurements using KAS-seq.
(C-D) Representative browser snapshots of KAS-seq profiles along the \textit{H. volcanii} genome.
(E) KAS-seq metaprofile along \textit{H. volcanii} gene bodies.
}
\label{Fig3}
\end{figure*}

While ATAC-seq is immensely helpful for identifying the location of accessible regions in the genomes and measuring their relative accessibility, it is a bulk method that does not provide information about the absolute levels of protection/accessibility in the genome. Instead, absolute accessibility must be measured either restriction digestion-based or enzymatic labeling single-molecule methods. To quantify absolute occupancy/protection levels in the \textit{H. volcanii} genome, we applied NOMe-seq\cite{Kelly2012} and dSMF\cite{Krebs2017} to \textit{Haloferax} chromatin. These methods rely on the preferential methylation of accessible cytosine nucleotides (5mC) by a recombinant methyltransferase that modifies specifically in GpC contexts (NOMe-seq) or a combination of methyltransferases that label both GpC and CpG (dSMF).

However, these methods are potentially confounded by the presence of endogenous methylation in either context. Fortunately, in the case of \textit{H. volcanii} endogenous DNA modifications have been previously studied using PacBio single molecule sequencing, and no CpG and GpC modifications were found. Instead, only two restriction modification system-associated modifications in different contexts, specifically, 4-methylcytosine in a C(m4)TAG context and N6-methyladenine in a GCA(m6)BN6VTGC context were identified\cite{Ouellette2015}.

Figures \ref{Fig2}A-C show the metaprofiles of average methylation around \textit{H. volcanii} TSSs for NOMe-seq and dSMF datasets generated from exponentially growing and stationary cultures (post log-phase in the growth curve). We observe baseline absolute protection levels around 84-85\% in the exponentially growing cells and $\sim$89\% in stationary cells. For comparison, analogous studies in eukaryotes, such as the budding yeast \textit{S. cerevisiae}\cite{Oberbeckmann2019,Chereji2019,Shipony2020}, have shown absolute protection levels around 90\% ($\pm$ 5\%). Thus, archaeal nucleosomal chromatin exhibits broadly similar, though perhaps somewhat lower levels of protection than what is observed in core eukaryotes.

The base-pair resolved nature of these  single-molecule methods enabled an observation of a feature not readily apparent in ATAC-seq and MNase-seq datasets -- a protection footprint immediately upstream of the TSS. We also observe this feature as a protection footprint in a few percent of single molecules at individual promoters (Figure \ref{Fig2}D). At present we are not able to confidently identify its functional association -- its width is likely too small for it to be a positioned -1 nucleosome, and we hypothesize it may correspond to one of the complexes involved in the archaeal transcriptional cycle, analogous to how similar protection footprints associated with the RNA polymerase and the preinitiation complex (PIC) in eukaryotes have been observed in dSMF datasets\cite{Krebs2017}. 

On the other hand, unlike this unique protection footprint, we do not observe strongly positioned individual nucleosomes along the \textit{Haloferax} genome, such as those seen in core eukaryotes (Figure \ref{Fig2}D). However, our NOMe-seq and dSMF datasets were sequenced at an effective depth of $\sim$100$\times$ for fragments of width 200 bp. To determine if higher sequencing depth would clearly reveal these positioned nucleosomes, we turned to the pHV2 plasmid, which, as previously discussed, exists in high copy numbers in \textit{H. volcanii} cells. Over the pHV2 plasmid we obtained $\sim$200$\times$ coverage for fragments of length 250 bp (Figure \ref{Fig2}E) and $\sim$1,200$\times$ coverage for fragments of length 200 bp (Figure \ref{Fig2}F). These high-depth maps also reveal considerable heterogeneity of footprints and accessible sites, consistent with previous proposals for highly dynamic association of archaeal nucleosomes with DNA.

\subsection*{The ssDNA and active transcription landscape in the \textit{H. volcanii} genome}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig4-KAS-CRISPR.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Abundant ssDNA structures associated with some \textit{H. volcanii} CRISPR arrays in specific conditions}. 
(A) Global KAS-seq profiles over each of the five \textit{H. volcanii} chromosomes in a long-standing culture ($\sim$3 months) reveals an extremely strong ssDNA peak associated with one of the CRISPR arrays on the pHV4 plasmid.
(B) KAS-seq signal levels around pHV4 plasmid CRISPR arrays in different condition.
(C) KAS-seq and ATAC-seq levels around all three \textit{H. volcanii} CRISPR arrays in different conditions.
}
\label{Fig4}
\end{figure*}

We then turned our attention to the landscape of active transcription in \textit{H. volcanii}. To this end, we utilized the KAS-seq\cite{Wu2020} assay, which measures with high specificity the presence of single-stranded DNA in the genome. Most ssDNA is usually found within the transcriptional bubbles associated with RNA polymerase molecules engaged with DNA. KAS-seq provides several advantages in the \textit{H. volcanii} context. First, due to the absence of readily available means of depleting \textit{H. volcanii} ribosomal RNA (rRNA) from RNA sequencing libraries, it enables measurement of transcriptional activity at much lower cost than deep RNA-seq experiments. Second, it measures the act of active transcription, unlike the steady-state transcript levels that conventional RNA-seq quantifies. Third, it also identifies other ssDNA structures, such as those resulting from paused polymerase molecules, G-quadruplexes, and others.

We carried out a time course analysis of \textit{Haloferax} growth and applied both KAS-seq and ATAC-seq during the ``exponential'' log-phase of growth, and on the ``stationary'' post-log phase stage, as well as on ``standing'' cultures, which had been left at room temperature for $\sim$1 week. We also carried out KAS-seq on exponentially growing cells that were then incubated at different temperatures -- the typical growing temperature of 42$\,^{\circ}\mathrm{C}$, 37$\,^{\circ}\mathrm{C}$, 23$\,^{\circ}\mathrm{C}$ and a cold shock at 4$\,^{\circ}\mathrm{C}$ for 4 hours.

At the broadest global level, we observe uniform levels of KAS signal along the length of \textit{H. volcanii} chromosomes (Figure \ref{Fig3}A), with sharp localized peaks. Also, KAS-seq measurements in \textit{H. volcanii} are highly reproducible between experimental replicates (Figure \ref{Fig2}B). Locally, at the level of individual genes, we observe a combination of high peaks at the promoters of some genes and elevated KAS-seq signal along gene bodies (Figure \ref{Fig2}C-D), which observation is generalized by KAS-seq metaprofiles over all genes (Figure \ref{Fig2}E). This indicates that in \textit{H. volcanii} RNA polymerases spend substantial amount of time associated with the TSS, perhaps to the point of full pausing analogous to that observed in metazoans\cite{Core2019}.

\subsection*{Abundant ssDNA structures associated with some \textit{H. volcanii} CRISPR arrays in specific conditions}

We also carried out a KAS-seq on a \textit{H. volcanii} culture that had been left standing at room temperature for 3$\sim$ months, which resulted in a surprising observation about the chromatin structure of CRISPR arrays in this organism. CRISPR (clustered regularly interspaced short palindromic repeats) arrays are a key element in the defense systems against foreign genetic material of many prokaryotes and consist of multiple identical repeats interspersed with non-repetitive sequences that target foreign plasmids and phages, together with a set of Cas genes. \textit{H. volcanii} is one of the prokaryotic systems where these elements were first originally observed\cite{Mojica2016,Mojica2005,Mojica1995}. 

In standing \textit{Haloferax} cultures, transcriptional activity is largely suppressed, as the cells enter a dormant state. This is reflected in the largely flat global map for the long-term standing culture KAS-seq dataset (Figure \ref{Fig4}A). However, we also observed a single extremely sharp peak of KAS-seq signal on the pHV4 plasmid. Close examination revealed that this peak resides between the second (as numbered in the available genome annotation) CRISPR array in the \textit{H. volcanii} genome and its associated Cas6 gene (Figure \ref{Fig4}B). This KAS-seq signal peak is also found in all other conditions we assessed, but it stands out particularly in the long-term standing culture due to the absence of all other peaks that result from the active transcription of regular genes.

Curiously, only the second CRISPR array in \textit{H. volcanii} displays this strong ssDNA structure, while the other two do not, but all three arrays show elevated chromatin accessibility in ATAC-seq datasets, which is not focused on the beginning of the array but covers its whole length (Figure \ref{Fig4}C). The possible interpretations of these observations is treated in detail in the Discussion section.

\subsection*{Coordination between chromatin accessibility and transcriptional activity within \textit{H. volcanii} operons}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig5-operons.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Coordination between chromatin accessibility and transcriptional activity within \textit{H. volcanii} operons}. Black bar shows the operon boundaries.
(A) Ribosomal RNA operon. Note that the tracks shown here were generated by including multimapping reads (see Methods for details).
(B) A-type ATP synthase subunits A, A, B, C, D, E, F, I, K, and H.
(C) RNA polymerase II subunits.
(D) NADH dehydrogenase-like complex subunits A, B, CD, H, I, J1, J2, K, L, M, N.
}
\label{Fig5}
\end{figure*}

Being prokaryotes, archaea often have genes organized into operons\cite{Price2006}, with multiple genes transcribed as a single unit. However, transcription of these operons is still little studied using modern genomic tools. To address this gap, we used our KAS-seq and ATAC-seq data, which provides information about the chromatin accessibility and active transcription in different conditions, to investigate the extent of coordination between the transcriptional activity of different units in operons in the \textit{H. volcanii} genome. 

Perhaps the most conspicuous \textit{H. volcanii} operon is the one that includes its rRNA genes\cite{Garrett1991}. There are two copies of this operon in the genome. Figure \ref{Fig5}A shows KAS-seq and ATAC-seq profiles along rRNA genes in the different conditions we assayed. We observe a largely uniform KAS-seq profile in exponentially growing cells, and generally elevated chromatin accessibility (which might be associated with very active transcription); a more non-uniform pattern is seen in cold-shocked cells kept at 4$\,^{\circ}\mathrm{C}$, where lower KAS-seq levels are seen over the large subunit (LSU) rRNA relative to the small subunit (SSU), with various intermediate states in other conditions.

More interesting patterns are seen in operons comprised of regular protein coding genes. Figure \ref{Fig5}B shows a gene array consisting of A-type ATP synthase subunits, for which distinct KAS-seq peaks are seen at the beginning of the operon as well as in between genes in the middle of the operon. Furthermore, KAS-seq levels are not uniform over the gene bodies of all genes. 

We examined multiple other operons (Figure \ref{Fig5}C-D) and Supplementary Figure \ref{FigS2}, which reveal a very diverse picture of the extent of coordination between the transcriptional activity over individual genes within an operon -- internal operon peaks are observed for multiple operons, while there are also other operons where KAS-seq signal is more uniform. 

In some cases (e.g. Figure \ref{Fig5}C), these internal KAS-seq peaks are also associated with matched ATAC-seq peaks. Thus, one interpretation of these observations is that not all these operons are true operons (even though they consist of functionally related genes), but instead independent initiation and regulation of transcription from internal TSSs may be occurring. This interpretation is particularly supported in the cases where ATAC-seq peaks are seen at the beginning of genes, and where the baseline gene-body KAS-seq signal differs greatly between different sections of the operon. On the other hand, internal KAS-seq peaks might also arise from very strong and immediate coupling between transcription and translation, i.e. if the process of initiation of translation at internal positions in the operon somehow leads to the polymerase pausing at certain sites.

\subsection*{Chromatin accessibility does not correlate with transcriptional activity in \textit{H. volcanii}}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig6-KAS-ATAC-DE-V3.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Chromatin accessibility does not correlate with transcriptional activity in \textit{H. volcanii}}. 
(A) Differential chromatin accessibility between exponential and stationary conditions;
(B) Differential KAS-seq levels between exponential and stationary conditions;
(C-D) Lack of correlation between KAS and ATAC signals in exponential and stationary conditions;
(E) Lack of correlation between changes in chromatin accessibility and changes in transcriptional activity.
}
\label{Fig6}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig7-KAS-ATAC-snapshots-heatmaps-KAS-sorted.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Chromatin accessibility does not correlate with transcriptional activity in \textit{H. volcanii}}. 
(A) Genome-wide heatmaps of ATAC-seq and KAS-seq signals around \textit{H. volcanii} TSSs, sorted by KAS-seq levels in the exponential condition.
(B-C) Representative snapshots of genes with significantly altered transcriptional activity between the exponential and standing condition, but no corresponding changes in chromatin accessibility.
}
\label{Fig7}
\end{figure*}

In eukaryotes, the regulation of chromatin accessibility at regulatory elements (promoters and enhancers) is key to gene regulation, as nucleosomal chromatin is generally refractive to occupancy by regulatory proteins and to active transcription\cite{Klemm2019}, and while perfect correlation between accessibility levels at promoters and gene expression is rare, open chromatin states are generally associated with increased transcriptional activity. 

In contrast, the relationship between chromatin accessibility and transcriptional activity in archaea has not been systematically studied as chromatin accessibility has not been mapped globally, across conditions, and in conjunction with global measurements of active transcription. 

We first identified differentially accessible promoter regions between the different conditions we studied (Figure \ref{Fig6}A). Strikingly, we did not find strong changes between exponentially growing and stationary cells (Figure \ref{Fig6}A). We observed large apparent differences in promoter accessibility magnitude between each of those two conditions and standing cultures (Supplementary Figure \ref{FigS3}), but in those comparisons the profiles are highly skewed towards increased higher accessibility in the actively growing and stationary cells instead of showing the typical more symmetric changes between two conditions; the explanation for this pattern is that it is due to the dormant state in which standing cultures are in, and in which we do not observe as strong ATAC-seq peaks as in actively growing cells.

In stark contrast to the lack accessibility changes between non-dormant conditions, we find a large number of genes that display strong differential KAS-seq signal over their gene bodies (Figure \ref{Fig6}B). The lack of major changes in chromatin accessibility is therefore not due to little changing in the overall cellular state between these conditions, as large-scale changes in RNA polymerase occupancy over the genome are in fact observed. 

We then quantified the degree of correlation between KAS and ATAC signals (Figure \ref{Fig6}C-D) and found no correlation between the two. We also found no correlation between the level of changes in chromatin accessibility and changes in RNA polymerase association with DNA between exponential and stationary cells (Figure \ref{Fig6}E). 

These global observations are supported by the study of individual loci. Figure \ref{Fig7}A shows ATAC-seq levels and KAS-seq levels over all genes across different conditions; the lack of correlation between them is readily apparent. Figure \ref{Fig7}B and C show individual examples of genes for which transcriptional activity shifts between conditions yet ATAC-seq profiles are largely identical. 

We thus conclude that based on the currently available data, the modulation of chromatin accessibility does not appear to be a major determinant/correlate of transcriptional activity in \textit{Haloferax} archaea.

\section*{Discussion}

In this study, we adapted and applied methods for global profiling of chromatin accessibility and ssDNA in the euryarchaeote \textit{Haloferax volcanii}, revealing the chromatin architecture of archaeal nucleosomal chromatin. We identified several convergent and divergent with respect to those of core eukaryotes properties. 

The \textit{H. volcanii} genome displays a chromatin organization very similar to that of eukaryotes with compact genomes such as budding yeast -- accessibility peaks are almost exclusively found very close to promoters. Absolute accessibility/protection levels are similar, perhaps slightly lower than those in budding yeast, with a baseline protection level of 85-90\%.

On the other hand, we do not observe the strongly positioned nucleosomes typical to eukaryotes in \textit{Haloferax}, but a more heterogeneous picture consistent with the previously reported dynamic association of archaeasomes with DNA.

Unlike what has been reported about bacteria and archaea without nucleosomes, the \textit{H. volcanii}  genome does not exhibit large-scale domains of diminished and elevated accessibility, which property can now tentatively be associated with the absence of nucleosomes in prokaryotes.

In contrast to the norm in eukaryotes, accessibility at promoters does not correlate with transcriptional activity. This is a puzzling observation that will need to be functionally dissected in depth by future studies. The idea that chromatin accessibility is not necessary for transcription in archaea is supported by previous observations that transcription by the archaeal RNA Polymerase is slowed, but not blocked by archaeal nucleosomes\cite{Xie2004}. However, we are still left with the question of what the determinants of the observed accessibility are. Nearly all promoters in \textit{H. volcanii} show some level of accessibility (Figure \ref{Fig7}A), but its levels differ greatly between individual genes. How these differential states are specified, and whether they might in fact change in conditions that we have not assayed remains to be determined. MNase-seq studies in \textit{Methanothermobacter thermautotrophicus} and \textit{Thermococcus kodakarensis}\cite{Nalabothula2013} resulted in the proposal that nucleosome positioning in those organisms is significantly influenced by DNA sequence, but no such strong association was reported for \textit{Haloferax volcanii}\cite{Ammar2012}, and it is not certain that very weak  sequence-based nucleosome positioning can account for the large differences in promoter accessibility combined with a general absence of such strong peaks elsewhere in the genome observed in \textit{Haloferax}. Generating chromatin accessibility and nucleosome positioning data across other archaeal clades and also in multiple closely related species will allow us to generalize these observations and to train fully powered models that relate sequence to chromatin accessibility, potentially identifying such determinants. 

We also made the surprising observation that operons in \textit{Haloferax} display non-uniform levels of transcriptional activity and may in fact consist of multiple distinct transcriptional units. Again, this is an observation and a hypothesis that needs to be generalized to and tested in not only more archaeal species, but also in bacteria, where the application of KAS-seq to the study of transcriptional activity may also result in unanticipated findings. In \textit{Haloferax} we were only able to examine several dozen of unambiguous operons (e.g. unidirectional arrays of functionally related genes).

Bacteria are also highly relevant to the other surprising observation we made -- the strong ssDNA structure present at the second \textit{Haloferax} CRISPR array, especially in dormant cells that are otherwise mostly transcriptionally silent, but not at the other two CRISPR arrays. The second CRISPR array is uniquely associated with the Cas6 gene. Cas6 is the endoribonuclease that generates guide RNAs\cite{Carte2008}, thus one possible explanation for the strong ssDNA peak between the CRISPR array and Cas6 is that it represents paused RNA polymerase at the Cas6 promoter. If this explanation is correct, the fact that Cas6 is the only gene in the genome with this property in dormant cells is remarkable and perhaps points to the importance of retaining the ability to process CRISPR transcripts even in a dormant cellular state. Alternatively, the ssDNA structure might be related to the transcription of the CRISPR array itself; however, such an explanation does not explain the uniqueness of the KAS-seq signal at the second CRISPR array and the absence of the same strong KAS peaks at the other two CRISPR arrays. The functional significance of elevated chromatin accessibility over CRISPR arrays is also currently unknown. As prokaryotes exhibit an immense variety of CRISPR systems and number and organization of CRISPR arrays, mapping these properties in multiple other additional prokaryotes should be highly informative.

The foundation provided by our work for the future application of functional genomic methods across different archaeal species and conditions should answer these and many other questions.

\section*{Methods}

Except where explicitly indicated otherwise, data was processed using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts})

\subsection*{\textit{Haloferax volcanii} cell culture}

\textit{H. volcanii} cells were obtained from the DSMZ German Collection of Microorganisms and Cell Cultures GmbH (Cat \# 3757), and cultured in \textit{Halobacterium} media\cite{Torreblanca1986,Rodrigue1983}, prepared as follows: 7.50 g casamino acids, 10.00 g yeast extract, 3.00 g sodium citrate, 2.00 g KCl, 20.00 g MgSO$_4 \times$ 7 H$_2$O, 0.05 g FeSO$_4 \times$ 7 H$_2$O, 0.20 mg MnSO$_4 \times$ H$_2$O, and 250.00 g NaCl were mixed with distilled water in a total volume of 1 L. Media was then autoclaved and allowed to cool.

\textit{H. volcanii} was typically grown at 42$\,^{\circ}\mathrm{C}$, except for where otherwise indicated. 

Cultures were stored at room temperature when not actively growing.

\subsection*{\textit{Haloferax volcanii} genome assembly and annotations}

For all analyses, the genome assembly and annotation for the \textit{Haloferax volcanii} DS2 strain, downloaded from the NCBI database, and also matching the \verb|haloVolc1| version on the UCSC Microbial Genome Browser\cite{UCSC2012} (\burl{http://microbes.ucsc.edu/}), was used. The UCSC Microbial Genome Browser was used for visualization of genome browser tracks.

\subsection*{ATAC-seq experiments}

Several variations of the ATAC-seq assays were tested. As \textit{H. volcanii} is an archaeon, i.e. a prokaryote without a nucleus, and as it does not have a cell wall, the nuclei isolation step typical for ATAC-seq protocols used in eukaryotes was omitted. 

For native ATAC-seq, cells ($\sim$0.1, $\sim$1 or $\sim$10 $\times$ 10$^6$ cells as measured by OD$_{600}$) were pelleted at 10,000 $g$ for 2 minutes, then resuspended in 50 $\mu$L transposition mix (25 $\mu$L 2$\times$ TD buffer, 2.5 $\mu$L Tn5, 22.5 $\mu$L ultrapure H$_2$O), and incubated at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. The reaction was stopped by adding 250 $\mu$L PB Buffer and purified using the MinElute PCR Purification Kit (Qiagen, Cat \# 28006), eluting in 10 $\mu$L EB buffer. PCR was carried out by mixing the 10 $\mu$L eluate, 10 $\mu$L H$_2$O, 2.5 $\mu$L i5 primer, 2.5 $\mu$L i7 primer, and 25 $\mu$L NEBNext High-Fidelity 2$\times$ PCR Master Mix, using the following thermocycler program: 3 minutes at $72\,^{\circ}\mathrm{C}$, 30 seconds at $98\,^{\circ}\mathrm{C}$, 10 cycles of: $98\,^{\circ}\mathrm{C}$ for 10 seconds, $63\,^{\circ}\mathrm{C}$ for 30 seconds, $72\,^{\circ}\mathrm{C}$ for 30 seconds. Final libraries were purified using the MinElute PCR Purification Kit. 

For crosslinked ATAC-seq, cells were fixed by adding 37\% formaldehyde (Sigma) at a final concentration of either 0.1\% or 1\% and incubating for 15 minutes at room temperature. Formaldehyde was then quenched using 2.5 M glycine at a final concentration of 0.25 M. Cells were subsequently centrifuged at 10,000 $g$ for 2 minutes, washed once in 1$\times$ PBS, and centrifuged again at 10,000 $g$ for 2 minutes. Transposition was carried out as above for 15 minutes. The reaction was stopped with the addition of 150 $\mu$L IP Elution Buffer (1\% SDS, 0.1 M NaHCO$_3$) and 2 $\mu$L Proteinase K (Promega, Cat \# MC5005), then incubated at 65$\,^{\circ}\mathrm{C}$ overnight to reverse crosslinks. DNA was isolated by adding an equal volume of 25:24:1 phenol:chloroform:isoamyl solution, vortexing and centrifuging for 3 minutes at 14,000 rpm, then purifying the top aqueous phase using the MinElute PCR Purification Kit, eluting in 10 $\mu$L EB buffer. Libraries were generated as described above.

\subsection*{ATAC-seq data processing}

Demultipexed FASTQ files were mapped to the \textit{H. volcanii} genome as 2$\times$36mers using Bowtie\cite{Bowtie2009} (version 1.0.1) with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). 

TSS scores were calculated as the ratio of ATAC signal in the region $\pm$100 bp around TSSs versus the ATAC signal of the 100-bp regions centered at the two points $\pm$2 kbp of the TSS as previously described\cite{MarinovShipony2021}.

Peak calling was carried out using MACS2\cite{MACS2} with the following settings: \verb|-g| \verb|4000000| \verb|-f| \verb|BAM| \verb|--to-large| \verb|--keep-dup all| \verb|--nomodel|.

\subsection*{DNA isolation and naked DNA sequencing}

Genomic DNA was isolated by centrifuging cells at 10,000 $g$ and resuspending the pellet in 200 $\mu$L 1$\times$ PBS, then using the MagAttract HMW DNA Kit (Qiagen, Cat \# 67563), following the manufacturer's instructions. 

Genomic DNA libraries were prepared using 5 ng of DNA in a 50-$\mu$L transposition reaction ($x$ $\mu$L DNA, 22.5 - $x$ $\mu$L H$_2$O, 25 $\mu$L 2$\times$ TD buffer, 2.5 $\mu$L Tn5). The reaction was carried out for 5 minutes at 55$\,^{\circ}\mathrm{C}$, then stopped with 250 $\mu$L PB buffer. DNA was isolated using the MinElute PCR Purification Kit and amplified as described above for ATAC-seq.

\subsection*{NOMe-seq and dSMF experiments}

NOMe-seq/dSMF experiments were carried out as previously described\cite{Shipony2020}, with some modifications. Cells were pelleted at 10,000 $g$, then crosslinked as described for ATAC-seq at 1\% formaldehyde concentration. 

Fixed cells were resuspended in 100 $\mu$L M.CviPI Reaction Buffer (50 mM Tris-HCl pH 8.5, 50 mM NaCl, 10 mM DTT), then treated with M.CviPI by adding 200 U of M.CviPI (NEB), SAM at 0.6 mM and sucrose at 300 mM, and incubating at $30\,^{\circ}\mathrm{C}$ for 20 minutes. After this incubation, 128 pmol SAM and another 100 U of enzyme were added, and a further incubation at $30\,^{\circ}\mathrm{C}$ for 20 minutes was carried out. For dSMF experiments, M.SssI treatment followed immediately, by adding 60 U of M.SssI (NEB), 128 pmol SAM, MgCl$_2$ at 10 mM and incubation at $30\,^{\circ}\mathrm{C}$ for 20 minutes. The reaction was stopped by adding an equal volume of Stop Buffer (20 mM Tris-HCl pH 8.5, 600 mM NaCl, 1\% SDS, 10 mM EDTA). 

Crosslinks were reversed overnight at $65\,^{\circ}\mathrm{C}$, and DNA was isolated using the MinElute PCR Purification Kit (Qiagen, Cat \# 28006).

Enzymatically labeled DNA was then sheared on a Covaris E220, and converted into sequencing libraries following the EM-seq protocol, using the NEBNext Enzymatic Methyl-seq Kit (NEB, Cat \# E7120L). 

\subsection*{NOMe-seq data processing}

Adapters were trimmed from reads using Trimmomatic\cite{trimmomatic} (version 0.36). Trimmed reads were aligned against the \textit{H. volcanii} genome using \verb|bwa-meth| with default settings. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Methylation calls were extracted using \verb|MethylDackel| (\burl{https://github.com/dpryan79/MethylDackel}). Additional analyses were carried out using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts}). 

\subsection*{KAS-seq experiments}

KAS-seq experiments were carried out following the previously published protocol\cite{Wu2020} with some modifications in the sequencing library generation part. 

Briefly, a 500-mM N$_3$-kethoxal solution was brought to 37$\,^{\circ}\mathrm{C}$, then added to 2 mL of culture at a final concentration of 5 $\mu$M. Cells were then incubated for 5 minutes at 37$\,^{\circ}\mathrm{C}$ in a ThermoMixer at 1000 rpm. 

Cells were then pelleted by centrifugation at 10,000 $g$ for 1 minute, resuspended in 200 $\mu$L 1$\times$ PBS buffer, and DNA was immediately isolated using the Monarch Genomic DNA Purification Kit (NEB, Cat \# T3010S), with the modification that elution was carried out with 50 $\mu$L 25 mM K$_3$BO$_3$ solution (pH 7.0).

Biotin was clicked onto kethoxal-modified guanines by mixing 50 $\mu$L DNA, 2.5 $\mu$L 20 mM DBCO-PEG4-biotin (Sigma, Cat \# 760749; DMSO solution), 10 $\mu$L 10$\times$ PBS, and 22.5 $\mu$L 25 mM K$_3$BO$_3$ and incubating at 37$\,^{\circ}\mathrm{C}$ for 90 minutes.

DNA was isolated using AMPure XP beads and eluted in 130 $\mu$L 25 mM K$_3$BO$_3$ (pH 7.0), then sheared on a Covaris E220 for 120 seconds down to $\sim$150-200 bp.

Libraries were built on beads using the NEBNext Ultra II DNA Library Prep kit (NEB, Cat \# E7645L). Biotin pull down was initiated py pipetting 20 $\mu$L Dynabeads MyOne Streptavidin T1 beads (ThermoFisher Scientific, Cat \# 65306) into DNA lo-bind tubes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB buffer (Tween Washing Buffer; 5 mM Tris-HCl pH 7.5; 0.5 mM EDTA; 1 M NaCl; 0.05\% Tween 20), then separated on magnet again and resuspended in 300 $\mu$L of 2$\times$ BB (Binding Buffer; 10 mM Tris-HCl pH 7.5, 1 mM EDTA; 2 M NaCl). The DNA (130 $\mu$L) was added together with 170 $\mu$L 0.1$\times$ TE buffer, and incubated at RT on rotator for $\geq$15 minutes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB, and incubated at 55$\,^{\circ}\mathrm{C}$ in a Termomixer for 2 minutes with shaking at 1000 rpm. Beads were again separated on magnet and the 200-$\mu$L 55$\,^{\circ}\mathrm{C}$ TWB wash step was repeated. Beads were separated on magnet and resuspended in 50 $\mu$L 0.1$\times$ TE. 

End repair was carried out by adding 7 $\mu$L NEB End Repair Buffer and 3 $\mu$L NEB End Repair Enzyme, incubating at 20$\,^{\circ}\mathrm{C}$ for 30 minutes, then at 65$\,^{\circ}\mathrm{C}$ for 30 minutes. 

End repair was followed by adaptor ligation by adding 2.5 $\mu$L NEB Adaptor, 1 $\mu$L NEB Ligation Enhancer and 30 $\mu$L NEB Ligation Mix, incubating at 20$\,^{\circ}\mathrm{C}$ for 20 minutes, then adding 3 $\mu$L USER Enzyme and incubating at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB, then incubated at 55$\,^{\circ}\mathrm{C}$ in a Thermomixer for 2 minutes with shaking at 1000 rpm. Subsequently beads were separated on magnet and resuspended in 100 $\mu$L of 0.1$\times$ TE, separated on magnet again, resuspended in 15 $\mu$L of 0.1$\times$ TE Buffer, and transfered to PCR tubes.

Beads were then incubated at $98\,^{\circ}\mathrm{C}$ for 10 minutes, and libraries were amplified by adding 5 $\mu$L of i5 primer, 5 $\mu$L of i7 primer and 25 $\mu$L of 2$\times$ Q5 Hot Start Polymerase Mix, using the following PCR program: 30 seconds at $98\,^{\circ}\mathrm{C}$; 15 cycles of $98\,^{\circ}\mathrm{C}$ for 10 seconds, $65\,^{\circ}\mathrm{C}$ for 30 seconds, and $72\,^{\circ}\mathrm{C}$ for 30 seconds; and a final extension at $72\,^{\circ}\mathrm{C}$  for 5 minutes.

Beads were separated on magnet and the final libraries were purified from the supernatant using 50 $\mu$L AMPure XP beads, eluting in 0.1$\times$ TE buffer. 

\subsection*{KAS-seq data processing}

Demultipexed FASTQ files were mapped to the \textit{H. volcanii} genome as 2$\times$36mers using Bowtie\cite{Bowtie2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). 

\subsection*{Multimapping reads analysis}

For the purpose of examining repetitive regions in the genome, such as the rRNA operons, which exist in two identical copies in the genome, and are thus not uniquely mappable, reads were mapped with the \verb|-a| option instead of \verb|-k 2| \verb|-m 1|. Normalization was carried out as previously described and discussed\cite{Marinov2015piwi}.

\subsection*{Differential accessibility/KAS-seq analysis}

The analysis of differential chromatin accessibility as measured using ATAC-seq or enriched for KAS-seq signal was carried out using DESeq2\cite{Love2014}. Read counts were calculated over promoters or gene bodies and used as input into DESeq2.

\subsection*{External sequencing datasets}

MNAse-seq datasets for \textit{H. volcanii} were downloaded from NCBI accession PRJNA174818\cite{Ammar2012}, and processed as described above for ATAC-seq and KAS-seq. 

ATAC-seq for \textit{Suflolobus islandicus}\cite{Badel2022} was downloaded through the Short Read Archive (SRA) from BioProject accession 814106.

\subsection*{Data availability}

The sequencing datasets generated for and used in this study can be accessed from GEO accession GSE207470.


\section*{Author contributions}

G.K.M. conceptualized the study, carried out cell culture, performed ATAC-seq, KAS-seq, and NOMe-seq/dSMF, analyzed the data and wrote the manuscript, with input from W.J.G. S.T.B. performed ATAC-seq, KAS-seq, and NOMe-seq/dSMF experiments. T.W. and C.H. provided key reagents. A.K. and W.J.G. supervised the study.

\section*{Acknowledgments}

The authors would like to thank Samuel Kim for input on the KAS-seq protocol, Matthew P. Swaffer for technical assistance, and Zohar Shipony and members of the Greenleaf and Kundaje labs for helpful discussions. This work was supported by NIH grants (P50HG007735, RO1 HG008140, U19AI057266 and UM1HG009442 to W.J.G., 1UM1HG009436 to W.J.G. and A.K., 1DP2OD022870-01 and 1U01HG009431 to A.K.), the Rita Allen Foundation (to W.J.G.), the Baxter Foundation Faculty Scholar Grant, and the Human Frontiers Science Program grant RGY006S (to W.J.G). W.J.G is a Chan Zuckerberg Biohub investigator and acknowledges grants 2017-174468 and 2018-182817 from the Chan Zuckerberg Initiative. 

\section*{Conflicts of interest}

W.J.G. is a consultant and equity holder for 10x Genomics, Guardant Health, Quantapore, and Ultima Genomics, and cofounder of Protillion Biosciences, and is named on patents describing ATAC-seq. 

A.K. is a consulting Fellow with Illumina, a member of the SAB of OpenTargets (GSK), PatchBio, SerImmune and a scientific co-founder of RavelBio.

\begin{thebibliography}{100}

% \section*{References}

\input{references}

\end{thebibliography}

\end{multicols}

\clearpage

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\setcounter{page}{1}
\renewcommand\thepage{{SM }\arabic{page}}

\begin{center}
% {\LARGE \textbf{\begin{spacing}{1.1}XXXX. \\ Supplementary Materials\end{spacing} }}
{\LARGE \textbf{Supplementary Materials}}
\end{center}

% \section*{Supplementary Tables}

\section*{Supplementary Figures}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf TSS enrichment levels in ATAC-seq data for \textit{Suflolobus islandicus}}. 
(A) TSS metaprofiles for ATAC-seq in fixed and unfixed cells and for genomic DNA libraries.
(B) TSS scores for each dataset.
}
\label{FigS1}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS2-operons.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Coordination between chromatin accessibility and transcriptional activity within \textit{H. volcanii} operons}. Black bar shows the operon boundaries.
(A) Putative phosphate-phosphonate ABC transporter.
(B) Flagellar cluster (FlaCE, FlaF, FlaG, FlaH, FlaI, FlaJ).
(C) Urease accessory protein operon (UreG, UreD, UreE, UreF).
(D) 50S ribosomal proteins L12, L10, L1, L11.
(E) Putative ABC transporter.
(F) FeS assembly genes SufC, SufB, SufD.
(G) RNA Polymerase subunits.
(H) Dihydroxyacetone kinase subunit L, subunit DhaK, and phosphotransfer subunit.
}
\label{FigS2}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=12cm]{FigS3-KAS-ATAC-DE.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Differential ATAC-seq in KAS-seq analysis for standing \textit{H. volcanii} cells}. 
(A-B) Differential chromatin accessibility;
(C-D) Differential KAS-seq levels.
}
\label{FigS3}
\end{figure*}


\end{document}