\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionAfterwards]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{0pt plus 0.3ex}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{fancyhdr}
\usepackage{tikz}
\usepackage{soul}
\usepackage{titlesec}
\usepackage{fancyhdr}
\usepackage{tikz}

\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\titleformat{\subsubsection}[block]{\normalsize\sc\itshape\filright}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}

\definecolor{linen}{rgb}{0.98, 0.94, 0.9}

\usepackage{fancyhdr}
\pagestyle{fancy}
\newlength{\oddmarginwidth}
\setlength{\oddmarginwidth}{1in+\hoffset+\oddsidemargin}
\newlength{\evenmarginwidth}
\setlength{\evenmarginwidth}{\evensidemargin+1in}
\fancyfootoffset[LO,RE]{\oddmarginwidth}
\fancyfootoffset[LE,RO]{\evenmarginwidth}
\fancyhead{}
\renewcommand{\headrulewidth}{0pt}
\fancyfoot{} 
\lfoot[C]{\tikz{\node[black,outer sep=-20pt,inner sep=5pt,fill=linen,text width=\dimexpr\paperwidth\relax,align=center] at (0,0) {\thepage};}}
% \lhead[LO,RE]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{Marinov, Chen et al.}};}}
% \lhead[LE,RO]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{5-hmU and chromatin accessibility distribution in dinoflagellates}};}}
\setlength{\footskip}{35pt}
\fancypagestyle{plain}{\pagestyle{fancy}}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf The landscape of the histone-organized chromatin of Bdellovibrionota bacteria}
\renewcommand\Authfont{\scshape\normalsize}
\author[1,$\#$]{Georgi K. Marinov}
\author[1]{Benjamin Doughty}
\author[1,2]{Anshul Kundaje}
\author[1,3,4,5]{William J. Greenleaf}
\renewcommand\Affilfont{\itshape\small}
\affil[1]{Department of Genetics, Stanford University, Stanford, California 94305, USA}
\affil[2]{Department of Computer Science, Stanford University, Stanford, California 94305, USA}
\affil[3]{Center for Personal Dynamic Regulomes, Stanford University, Stanford, California 94305, USA}
\affil[4]{Department of Applied Physics, Stanford University, Stanford, California 94305, USA}
\affil[5]{Chan Zuckerberg Biohub, San Francisco, California, USA}
\affil[$\#$]{Corresponding author}
\date{}

\begin{document}
\maketitle

% \centerline{}
% \centerline{}
\begin{abstract}

\noindent {\normalsize \textbf{Histone proteins have traditionally been thought to be restricted to eukaryotes and most archaea, with eukaryotic nucleosomal histones deriving from their archaeal ancestors. In contrast, bacteria lack histones as a rule. However, in recent years histone proteins have been identified in a few bacterial clades, in particular the phylum Bdellovibrionota, and these histones have been proposed to exhibit a range of divergent features compared to histones in archaea and eukaryotes. However, no experimental functional genomic studies of the properties of Bdellovibrionota chromatin have been carried out. In this work, we map the landscape of chromatin accessibility, active transcription and three-dimensional genome organization in a member of Bdellovibrionota (a \textit{Bacteriovorax} strain). We find that \textit{Bacteriovorax} chromatin is characterized by preferential accessibility around promoter regions, similar to what is observed in eukaryotes with compact genomes such as yeast, and also to some archaea. As in eukaryotes, chromatin accessibility positively correlates with gene expression. Mapping active transcription through single-strand DNA (ssDNA) profiling revealed that \textit{Bacteriovorax} promoters exhibit very strong polymerase pausing, unlike in yeast, but similar to the state of mammalian and fly promoters. Finally, the \textit{Bacteriovorax} genome exists in a three-dimensional (3D) conformation analogous to that of other bacteria without histones, organized by the parABS system and along the axis defined by replication origin and termination regions. These results provide a foundation for understanding the chromatin biology of the unique Bdellovibrionota bacteria and the deep evolution of chromatin organization across the tree of life.
} 
}
\centerline{}
\centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig1-assembly-phylogeny-V2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Genome assembly and annotation of \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12]}. 
(A) Circos\cite{Krzywinski2009} plot of the \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] chromosome. Forward- and reverse-strand protein coding genes are shown in red and blue respectively in the inner tile layer; ribosomal RNAs and tRNAs are shown in the outer tile layers. The red histogram shows the cumulative GC-skew\cite{Grigoriev1998}; with the inflection points corresponding to the replication origins and and termini\cite{Lobry1996}.
(B) Maximum likelihood 16S rRNA tree (generated using RAxML-NG\cite{Kozlov2019}) of members of the Bdellovibrionota phylum and the phylogenetic location of strain ICPB 3264 [H-I A3.12]. 
(C) Multiple sequence alignment of histone proteins identified in select \textit{Bacteriovorax} and \textit{Bdellovibrio} genomes. In contrast to the singlet histone folds in \textit{Bdellovibrio}, the histone gene in \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] is a histone doublet; the two histone folds are shown separately (top two rows).
(D) Predicted\cite{AlphaFold} protein structure of the histone gene in \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12].
}
\label{Fig1}
\end{figure*}

Histones and nucleosomal chromatin are one of the defining features of the broadest divisions of life on our planet. Nearly all eukaryotes\cite{Marinov2015dino} package their chromatin around nucleosomal particles, composed of two dimerized tetramers of the four core histones H2A, H2B, H3 and H4, and histones are the most conserved proteins in their genomes\cite{Postberg2010}, especially when it comes to the key residues playing a vital role in the so called ``histone code''\cite{Jenuwein2001} (and through it in all aspects of chromatin biology -- transcription, gene regulation, DNA repair, DNA replication, mitosis and meiosis, and many others), which are almost universally invariant across all eukaryotes. 

Eukaryotic histones trace their ancestry to Archaea\cite{Woese1977}, which is the half of the prokaryote diversity from within which eukaryotes appear to have emerged, and where the informational processing systems of eukaryotes are thought to derive from\cite{Lake1984,Lake1988,Cox2008,Spang2015,Koonin2010,Koonin2014}. However, archaeal histones are quite different from those of eukaryotes. While they all contain the same cannonical histone fold domain\cite{Sandman2006}, consisting of three alpha helices\cite{Arents1991} dimerizing with another histone molecule, archaeal histones only contain the histone fold domain\cite{Henneman2018,Stevens2020} while those of eukaryotes have long tails (that are the primary sites of post-translational modifications). Another major difference is that unlike the eukaryotic octamer nucleosome, archaeal histone dimers can oligomerize into so called ``hypernucleosomes''\cite{Maruyama2013,Mattiroli2017,Bowerman2021,Henneman2021}, which are stacks of individual histone dimers, and can be of variable length.

Bacteria are the other main branch of prokaryotes and are generally thought to not have histones. However, recent phylogenomic and experimental studies have revealed that some bacteria in fact do possess histone proteins\cite{Alva2019,Schwab2023}. Most notably in the bacterium \textit{Bdellovibrio bacteriovorus} and other members of the Bdellovibrionota phylum, histones are both major in abundance and essential for viability components of the nucleoid\cite{Hocher2023,Hu2023}. Bdellovibrionota histones have been shown to bend DNA as dimers and in a sequence-independent manner\cite{Hu2023}, while other studies have suggested that they exhibit yet another major deviation from the conventional eukaryote state of nucleosomal architecture by binding to DNA head-on, coating it, as opposed to the situation in eukaryotes and archaea, where DNA is wrapped around histones\cite{Hocher2023}. 

\begin{figure*}
\begin{center}
\includegraphics[width=16cm]{Fig2-ATAC.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The accessible chromatin landscape in \textit{Bacteriovorax} }. 
(A) ATAC-seq fragment length distribution.
(B) ATAC-seq metaprofile around annotated \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] TSSs.
(C) Circos plot of the global accessibility distribution along the \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] chromosome.
(D-E) Representative snapshots of local ATAC-seq signal distribution in \textit{Bacteriovorax} (exponentially growing culture).
(F) V-plot of ATAC-seq fragment distribution around annotated \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] TSSs.
}
\label{Fig2}
\end{figure*}

These observations pose a long list of tantalizing questions about the physical organization of the genomes of these histone-possessing bacteria, about the relationship between chromatin structure and gene expression, and about how such features compare to those of archaea. However, while archaea have received some experimental attention in the past decade\cite{Ammar2012,Nalabothula2013,Badel2022,Marinov2022Haloferax}, these properties have not been assayed so far with functional genomic tools in the bacterial clades that have histones. 

In this study, in order to begin addressing these outstanding issues, we have mapped chromatin accessibility using ATAC-seq\cite{ATAC} (Assay for Transposase-Accessible Chromatin using sequencing),% and NOMe-seq/dSMF\cite{Kelly2012,Krebs2017} (Nucleosome Occupancy and Methylome sequencing/dual Single-Molecule footprinting)
transcriptional activity using KAS-seq\cite{Wu2020} (Kethoxal-assisted single-stranded DNA sequencing), and three-dimensional genome organization using Hi-C\cite{Lieberman2009} in a member of the Bdellovibrionota phylum (a \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] strain). Our data reveals a chromatin accessibility landscape reminiscent of that of eukaryotes with highly compact genomes, with preferentially accessible promoter regions, strong polymerase pausing at promoters, positive correlation between promoter accessibility and active transcription rates, and conventional for bacteria three-dimensional organization. We discuss these properties in the broad context of the diversity of life on our planet.

\section*{Results}

\subsection*{Genome assembly and annotation of \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12]}

We first set out to map the chromatin accessibility landscape in \textit{Bdellovibrio bacteriovorus} using ATAC-seq, given that \textit{Bdellovibrio} histones were the ones characterized by recent studies of bacterial histones\cite{Hocher2023} and \textit{Bdellovibrio bacteriovorus} is the most well-studied Bdellovibrionota representative. However, a major technical challenge to the application of ATAC-seq to \textit{Bdellovibrio} is presented by the unique biology of these bacteria. \textit{Bdellovibrio} is the most famous example of a predatory prokaryote\cite{Stolp1963,Stolp1965,Lovering2021} -- it feeds by attaching onto the cell wall of gram-negative bacteria, creating an opening into it, then inserting itself into the periplasmic space between the inner and outer bacterial membranes, breaking down the host cell, subsequently undergoing polyploid division, and eventually lysing the remnants of the host. This means that \textit{Bdellovibrio bacteriovorus} is usually grown together with another prey species, which are bacteria without histones. 

This is problematic in terms of applying ATAC-seq to \textit{Bdellovibrio} because the basic principle behind ATAC-seq is the strong preference of the Tn5 transposase for open chromatin not protected by nucleosomes\cite{ATAC}. In mammalian cells, this can result in extreme overrepresentatin of mitochondrial DNA sequences in final libraries if mitochondria have not been properly filtered out during the nuclei isolation procedure. In a mixture of bacteria without and with histones, if the histones of the latter are strongly protecting DNA from Tn5 insertion, ATAC-seq libraries could easily consist almost entirely of fragments from the prey rather than \textit{Bdellovibrio}.

Fortunately, prey-independent strains of \textit{Bdellovibrio} are also available, allowing axenic growth in media free of other bacteria. We obtained ``\textit{Bdellovibrio bacteriovorus}'' strain ICPB 3264 [H-I A3.12] from ATCC and carried out ATAC-seq on it. However, almost no reads from these libraries aligned to any of the previously sequenced Bdellovibrionota strains available in the NCBI databases (neither \textit{Bdellovibrio} nor strains from \textit{Bacteriovorax}, which is the other major division of the phylum). 

We thus carried out \textit{de novo} genome sequencing of the strain we worked with, using a combination of nanopore long reads and short Illumina reads (see the Methods section for details). This resulted in a single contig of length 4,148,738 bp ($\sim$667$\times$ coverage) predicted to encode (see Methods for details) 4,127 protein coding genes (Figure \ref{Fig1}A). 

Analysis of potential DNA modifications revealed the existence of 5-methylcytosine methylation in a CpGGCC context. This is likely associated with a restriction modification (R-M) system\cite{Tock2005} in \textit{Bacteriovorax}.

We then established the precise phylogenetic positioning of the strain by analyzing available 16S rRNA sequences for Bdellovibrionota strains. The phylum Bdellovibrionota\cite{Oren2021} includes three major clades/classes -- Bacteriovoracia, Bdellovibrionia, and Oligoflexia. Bacteriovoracia includes the \textit{Bacteriovorax}, \textit{Halobacteriovorax} and \textit{Peredibacter} genera while Bdellovibrionia features \textit{Bdellovibrio} and \textit{Pseudobdellovibrio}. Our phylogenetic analysis points to strain ICPB 3264 [H-I A3.12] beloning to the \textit{Bacteriovorax} genus, being most closely related to \textit{Bacteriovorax} sp. B1T4-F (Figure \ref{Fig1}B), even though it was originally labeled as a ``\textit{Bdellovibrio}'' strain. We refer to it as \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] or simply \textit{Bacteriovorax} in the subsequent text.

Finally, we examined putative \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] histone genes in order to confirm their presence in its genome, and found one histone protein to be encoded in our assembly. Unlike \textit{Bdellovibrio} histone genes, which encode histone singlets that later form dimers, in \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] the histone gene encodes a histone doublet that presumably functions as a dimer on its own (Figure \ref{Fig1}C--D).

\begin{figure*}
\begin{center}
\begin{minipage}[c]{0.80\linewidth}
\includegraphics[width=14.8cm]{Fig4-KAS.png}
\end{minipage}\hfill
\begin{minipage}[c]{0.20\linewidth}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf The ssDNA and active transcription landscape in \textit{Bacteriovorax} }. 
(A) Circos plot of the global KAS-seq signal distribution along the \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] chromosome.
(B-C) Local KAS-seq signal distribution in \textit{Bacteriovorax} shows strongly localized peaks associated with promoter regions
(D) Metaplot of KAS-seq and ATAC-seq signal distribution along  \textit{Bacteriovorax} genes.
(E) KAS-seq and ATAC-seq profiles around the ribosomal RNA locus of \textit{Bacteriovorax}.
(F) KAS-seq and ATAC-seq profiles around the heat shock protein \textit{GrpE2} gene.
(G) KAS-seq and ATAC-seq profiles around the \textit{LuxR2} transcriptional activator gene.
(H) KAS-seq and ATAC-seq profiles near the origin of replication of the \textit{Bacteriovorax} chromosome.
}
\label{Fig4}
\end{minipage}
\end{center}
\end{figure*}

\subsection*{ATAC-seq reveals the accessible chromatin landscape in \textit{Bacteriovorax}}

In order to map the chromatin accessibility landscape in \textit{Bacteriovorax}, we adapted the previously described bacterial ATAC (bac-ATAC) protocol\cite{Melfi2021}, which involves crosslinking with 1\% formaldehyde and permeabilization of the cell wall with lysozyme (see the Methods section for details). 

In metazoans, ATAC-seq libraries display a clear nucleosomal signature, with a strong subnucleosomal peak ($\leq$120 bp) followed by a robust mononucleosomal ($\sim150$ bp) peak, a weaker dinucleosomal peak, and so on. In \textit{Bacteriovorax}, we do not observe a nucleosomal protection pattern but only a single broad fragment peak centered between 100 bp and 200 bp (Figure \ref{Fig2}B). Thus \textit{Bacteriovorax} histones do not appear to confer the same arrayed local protection to DNA as eukaryotic ones.

Accessibility is centered around the promoter regions between genes (Figure \ref{Fig2}B-E). We observe transcription start sites (TSS) scores (indicating the level of enrichment over promoter regions relative to flanking regions; see the Methods section and previous detailed discussions\cite{MarinovShipony2021}) in the 1.35-1.40 neighborhood, which is comparable to what we had previously found in the archaeon \textit{Haloferax volcanii}\cite{Marinov2022Haloferax} and also to the lower end what is usually seen in eukaryotes with highly compact genomes such as yeast\cite{Shipony2020}. In contrast, naked genomic DNA controls exhibit slight depletion around TSS (Figure \ref{Fig2}B), confirming the non-artefactual origin of the observed enrichment. Manual inspection of ATAC-seq profiles along the genome confirms these global observations, but also shows frequent cases of elevated accessibility extending into gene bodies (Figure \ref{Fig2}D-E). 

We also note that a previous ATAC-seq study of conventional bacteria that do not possess histones (\textit{Caulobacter crescentus}) reported the existence of broad domains of elevated and decreased accessibility spanning hundreds of kilobases\cite{Melfi2021}. This is not readily observed in \textit{Bacteriovorax} (Figure \ref{Fig2}C), just as it is not observed in the archaeon \textit{Haloferax volcanii}.

Finally, we asked whether patterns of nucleosomal positioning are observed around promoters using V-plot analysis\cite{Henikoff2011}. With the caveat that we do not yet have absolutely precise TSS annotations for \textit{Bacteriovorax}, we do not observe strongly positioned nucleosomes around its TSSs (Figure \ref{Fig2}F).

% \hl{-- ChromBPNet}

% \subsection*{Absolute DNA occupancy/protection levels in \textit{Bacteriovorax} }

% -- TSS metaprofile
% -- single-molecule plots
% -- is there any clustering?

% \begin{figure*}
% \begin{center}
% \includegraphics[width=18.25cm]{Fig3-SMF.png}
% \end{center}
% \captionsetup{singlelinecheck=off,justification=justified}
% \caption{
% {\bf \hl{XXX FIGURE 3 SMF XXX} }. 
% (A) 
% (B)
% (C) 
% (D) 
% }
% \label{Fig3}
% \end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=17cm]{Fig5-diff.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Relationship between chromatin accessibility and transcriptional activity in \textit{Bacteriovorax}}. 
(A) Starvation of \textit{Bacteriovorax} cells induces changes in the chromatin accessibility and active transcription landscapes. Show are three independent KAS-seq and ATAC-seq replicates for each condition.
(B) Differential accessibility and KAS levels between exponentially growing and starved (HEPES) cells.
(C) Global ATAC-seq and KAS-seq profiles over \textit{Bacteriovorax} genes under exponentially growing and starvation conditions.
(D) Correlation between ATAC-seq and KAS-seq signal over TSSs and gene bodies.
}
\label{Fig5}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig6-operons.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Chromatin accessibility and active transcription levels in \textit{Bacteriovorax} operons }. 
(A-H) Genome browser snapshots of ATAC-seq and KAS-seq levels in exponentially growing and starved (HEPES) cells for eight different \textit{Bacteriovorax} operons.
}
\label{Fig6}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig7-Hi-C-V2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Three-dimension organization of the the \textit{Bacteriovorax} sp. ICPB 3264 [H-I A3.12] chromosome}. 
(A) Hi-C map of exponentially growing \textit{Bacteriovorax} cells.
(B) Hi-C map of starved (HEPES) \textit{Bacteriovorax} cells.
}
\label{Fig7}
\end{figure*}

\begin{figure*}[!hb]
\begin{center}
\includegraphics[width=18.5cm]{Fig8-comparison.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Comparison of the chromatin landscape across the deep organismal diversity}. Shown is the presence of histones, histone-based chromatin, the basic genomic properties, i.e. genome size, gene length, gene density/intergenic space size, and the extent of concentration of ATAC-seq signal around promoters (``TSS score'', see the Methods section for details).
}
\label{Fig8}
\end{figure*}

\subsection*{The ssDNA and active transcription landscape in \textit{Bacteriovorax}}

Next, we mapped the landscape of active transcription in \textit{Bacteriovorax}. To this end we employed the KAS-seq assay\cite{Wu2020}, which strongly and specifically enriches for ssDNA structures by labeling exposed guanine (G) bases with N$_3$-kethoxal, followed by biotin labeling through a click reaction, fragmentation of DNA, and streptavidin pulldown (see the Methods section for details). While KAS-seq labels all ssDNA structures (G-quadruplexes, replication intermediates, etc.), the most abundant by a large distance source of ssDNA in the cell are transcriptional bubbles associated with RNA polymerases directly engaged with DNA, both paused and actively elongating\cite{Wu2020}, thus KAS-seq datasets are an excellent way to map active transcription in a simple and efficient way. 

As with ATAC-seq, we do not observe broad large-scale domains of active transcription along the \textit{Bacteriovorax} chromosome in our KAS-seq datasets (Figure \ref{Fig4}A). The global landscape is largely uniform, punctuated by strong localized peaks. 

Strikingly, examination of local browser tracks  (Figure \ref{Fig4}B-C) revealed these peaks to be associated with promoters, indicating the existence of very strong promoter pausing in \textit{Bacteriovorax}. Polymerase pausing and its later controlled release are decisive steps in gene regulation in many eukaryotes\cite{Core2019}, classically having been first described in the fruit fly \textit{Drosophila melanogaster}\cite{Gilmour1986,Rougvie1988,Muse2007,Nechaev2010}, and also being widespread in mammals\cite{Kwak2013,Williams2015}, most, though not all metazoans, and some plants, but curiously absent in yeast and \textit{Arabidopsis thaliana}\cite{Chivu2023}. Bacteria do exhibit sequence-dependent polymerase pausing but this has been primarily reported in the context of transcription elongation and termination\cite{Kassavetis1981,Yanofsky1981,Winkler1981,Lau1983,Kireeva2009,Kang2019} and \textit{E. coli} promoters are not characterized by the typical peaks associated with pausing in global run-on GRO-seq datasets\cite{Chivu2023}. The strong KAS-seq peaks we observe are not due to transcription termination-associated pausing because they are found in all possible orientations of gene pairs, including between two divergent promoters  (Figure \ref{Fig4}B-C).

Of note, we had previously observed promoter pausing using KAS-seq in the euryarchaeon \textit{Haloferax volcanii}\cite{Marinov2022Haloferax}. \textit{Bacteriovorax} promoters are even more accentuated in KAS-seq profiles than in \textit{Haloferax} -- promoter KAS signal peaks at $\sim$12 RPM (Reads Per Million mapped reads) over \textit{Bacteriovorax} promoters compared to a through at $\sim$10 RPM in intergenic space and $\sim$8 over gene bodies  (Figure \ref{Fig4}B-D); in \textit{Haloferax} these values are $\sim$11, $\sim$10 and $\sim$8 RPM, respectively\cite{Marinov2022Haloferax}.

The most strongly transcribed (as assessed by KAS-seq levels over the gene body) genes in \textit{Bacteriovorax} are the two ribosomal RNAs (Figure \ref{Fig4}E). Notably, these do not exhibit a KAS-seq peak in their promoter (although a very strong such peak is located downstream of the ribosomal RNA operon). This is also one of the most accessible over gene bodies loci in the genome as measured by ATAC-seq; this points to an analogous situation to the one well known from yeast and other eukaryotes. Yeast rDNA exists in two states -- a transcriptionally inactive chromatinized condition on one hand, and a highly transcriptionally active, almost entirely devoid of histones state\cite{Shipony2020,Conconi1989,Merz2008} on the other. These observations also contrast with those from ATAC-seq for \textit{Caulobacter crescentus}, which does not posses histones, and in whose genome ribosomal RNA clusters were reported to be one of a handful of highly transposase-inaccessible transcribed regions\cite{Melfi2021} (HINTs). 

Other genes where particularly strong putative paused RNA polymerase peaks are observed include the \textit{GrpE2} heat shock protein (Figure \ref{Fig4}F), a \textit{LuxR2} transcriptional activator gene  (Figure \ref{Fig4}G), the chaperone protein ClpB ATP-dependent unfoldase (Supplementary Figure \ref{FigS1}A), the RNA Polymerase Sigma Factor RpoH1 (Supplementary Figure \ref{FigS1}B), and others. For each of these proteins, especially the heat shock ones, it is plausible that polymerase pausing is a regulatory mechanism specifically designed to enable their very rapid activation, in a manner analogous to heat shock genes in \textit{Drosophila} where the phenomenon was originally described\cite{Gilmour1986,Rougvie1988}.

The strongest ATAC-seq peak in the genome is found in an unusually large integenic space without any annotated genes located between replication-related genes and near the origin of replication (Figure \ref{Fig4}H). This locus exhibits only a modest ssDNA peak, and may possibly be associated with replication initiation.

\subsection*{Chromatin accessibility correlates with transcriptional activity in \textit{Bacteriovorax}}

Our next step was to characterize the relationship between chromatin accessibility and transcriptional activity in \textit{Bacteriovorax} as well as their dynamics upon large-scale gene expression perturbations. Genome-wide gene expression dynamics in these organisms has not been studied extensively in the literature, but previously it was reported that starvation of \textit{Bdellovibrio} cells for 4 hours in HEPES buffer does result in major changes in gene expression\cite{Dwidar2017}. 

We carried out ATAC-seq and KAS-seq in multiple replicates from the same cells and at the same time in both exponentially growing and HEPES-starved cultures, and indeed observed large-scale changes in transcriptional activity and chromatin accessibility (Figure \ref{Fig5}A-C). Curiously, chromatin accessibility around promoters largely disappeared in starved cells, even though transcriptional activity was either unaffected at many promoters or shifted markedly in others but overall preserved similar global properties to exponentially growing cells (except for a slight shift from promoters towards presumed elongation over gene bodies for a number of genes; Figure \ref{Fig5}C). We identified 555/378 gene bodies and 488/607 TSSs with respectively statistically decreased/increased KAS-seq signal. 

Previously we found no correlation between chromatin accessibility and transcriptional activity in the archaeon \textit{Haloferax volcanii}\cite{Marinov2022Haloferax}. In contrast, ATAC-seq signal both over promoter regions and gene bodies correlates positively with KAS signal, again over both promoter regions and gene bodies (Figure \ref{Fig5}D). 

\subsection*{Frequent independent transcription initiation/regulation of individual genes within \textit{Bacteriovorax} operons}

In both bacteria and archaea functionally related genes are often organized into operons\cite{Price2006} that are transcribed together. However, evidence has accumulated over the years that certain genes within operons may be transcribed and regulated separately from the whole operon\cite{Koide2009,Babski2016,Laass2019,Grunerger2019}. KAS-seq and ATAC-seq data for the archaeon \textit{Haloferax} also demonstrated this phenomenon on a genome-wide scale -- individual promoters, discernible by strong ATAC-seq and/or KAS-seq peaks inside operons, are frequently observed in that organism, as are obviously different KAS-seq levels over different genes within a single operon\cite{Marinov2022Haloferax}. 

Bdellovibrionota are bacteria, and we aimed to determine whether similar unexpected complexity in the functional organization of operons exists in their genomes too. To this end we identified cases of clear operons (i.e. colinear genes obviously belonging to the same functional group) and examined KAS-seq and ATAC-seq profiles in both conditions that we assayed (Figure \ref{Fig6}). We observed multiple cases of both internal KAS-seq and ATAC-seq peaks inside operons (e.g. Figure \ref{Fig6}B and Figure \ref{Fig6}E). While internal KAS-seq peaks could be associated with polymerase pausing due to regulation of transcriptional elongation, RNA processing or cotranscriptional translation events, internal ATAC-seq peaks most likely correspond to independent promoters. 

\subsection*{Three-dimensional organization of the \textit{Bacteriovorax} genome}

Finally, we characterized the 3D organization of the \textit{Bacteriovorax} genome. We employed a modification of the Hi-C\cite{Lieberman2009} assay, using multiple restriction 4-cutter enzymes to digest crosslinked \textit{Bacteriovorax} chromatin in order to improve resolution in its very small and compact genome (see the Methods section for details). 

Hi-C has been previously applied to study the physical organization of several bacterial and archaeal strains, such as originally \textit{Caulobacter crescentus}\cite{Le2013,Yildirim2018} and later multiple others\cite{Trussart2017,Marbouty2014,Marbouty2015,Marbouty2017,Lioy2018,Bohm2020,Marbouty2021,Lamy-Besnier2023}. These studies have revealed two main typical features of bacterial chromosomes. In \textit{Caulobacter} and most other bacteria, Hi-C maps exhibit a prominent pattern of increased interactions perpendicular to the main diagonal and connecting the origin and the terminus of replication, reflecting a chromosome configuration driven by SMC/condensin complexes loaded at centromere-like parS sites near replication origins\cite{Wang2017}. The parABS system\cite{Austin1983a,Austin1983b,Abeles1985,Jalal2020} plays a major role in this process, with most bacteria having one or multiple highly conserved\cite{Livny2007} parS sites near their origin of replication. The second common major feature are self-interacting chromosomal interaction domains (CIDs), areas of increased local contact frequency corresponding to plectonemes arising as a result of transcription-induced supercoiling\cite{Le2013}. 

Hi-C maps of archaea differ considerably -- \textit{Sulfolobus}, which does not have histones, appears to possess three distinct origins of replication resulting in a much more complicated chromosomal configuration, and does not display CIDs\cite{Takemata2019}, while \textit{Haloferax}, which has histones (although it is not clear to what extent they package the genome), was reported to exhibit CIDs but not the perpendicular to the main diagonal feature typical to bacteria\cite{Cockram2021}.

In contrast, \textit{Bacteriovorax}, although it does possess histones, exhibits a typical large-scale bacterial organization of its chromosome, with very strong cross-diagonal interactions connecting the replication origin and termination regions (Figure \ref{Fig7}A). However, we do not observe clear evidence for plectonemic CID-like structures in our maps. HEPES-starved cells exhibit decreased strength of the cross-diagonal feature (Figure \ref{Fig7}B), likely reflecting much lower replication activity. 

% \hl{KAS/ATAC relation}

\section*{Discussion}

That bacteria with histone-based chromatin exist represents one of the most surprising and exciting discoveries in chromatin biology in recent years, coming after the assumption that histones are restricted to archaea and eukaryotes had reigned for many decades. Our study, carried out in a novel \textit{Bacteriovorax} sp. strain whose genome we assembled and annotated \textit{de novo}, presents the first direct measurements of chromatin accessibility, the distribution of active transcription and ssDNA, and three-dimensional genome conformation in such bacteria, and it allows us to chart some of the broadest trends in chromatin organization across the tree of life (Figure \ref{Fig8}). 

Like the euryarchaeon  \textit{Haloferax}, and similar to eukaryotes with nucleosomal chromatin, \textit{Bacteriovorax}'s chromatin is characterized by preferentially accessible promoter regions. The relative level of promoter accessibility is comparable to that of \textit{Haloferax}, slightly lower than yeast, and considerably lower than metazoans with much larger genomes. Likely this is a reflection of high transcriptional activity in extremely compacted genomes, i.e. actively transcribed regions temporarily lose the protection from transposase insertion conferred by histones or other packaging proteins, and the density of actively engaged polymerase molecules is higher in such cases than it is in mammals with highly bloated genomes. We see some more direct evidence for this phenomenon in the case of the rDNA operon in \textit{Bacteriovorax}, which is the most broadly accessible gene in the genome, similar to the situation with rDNA in yeast. 

Common with eukaryotes, but unlike the euryarchaeon \textit{Haloferax}, \textit{Bacteriovorax} exhibits positive correlation between chromatin accessibility and active transcription, suggesting that displacing histones from promoters might play a regulatory role in this organism. There are two observations that complicate such an interpretation. First, we do not observe a protection signature in the ATAC-seq fragment length distribution analogous to what is seen in eukaryotes and also in some archaea with histone-based chromatin\cite{Ofer2023}; it is thus not entirely clear how exactly \textit{Bacteriovorax} histones physically protect DNA and bind to it (somewhat different models have been proposed in the past year\cite{Hocher2023,Hu2023}). Second, in starved cells we observe loss of ATAC-seq enrichment over promoters but not of actively transcribing polymerases; this can be interpreted in two ways -- either promoters close upon strong stress signals, but \textit{Bacteriovorax} histones are not strongly inhibitory to polymerase activity, or, alternatively, histones are lost from chromatin, but this does not strongly affect polymerase engagement with DNA, even though histones appear to be essential in these organisms. Single-molecule mapping of chromatin accessibility\cite{Kelly2012,Krebs2017,Shipony2020} and examination of a much broader array of conditions can be expect to shed light on these questions.

Surprisingly, we also observe evidence for very strong polymerase pausing over \textit{Bacteriovorax} genes, in contrast to what has been reported for bacteria such as \textit{E. coli}\cite{Chivu2023}, and similar to what is seen in many (though not all) eukaryotes. Thus regulation of productive transcriptional elongation might be a key mechanism of modulating gene expression in bacteria with histone-based chromatin. 

We also find evidence for independent regulation of and transcription initiation from internal promoters inside \textit{Bacteriovorax} operons. This is similar to what is observed in the euryarchaeon \textit{Haloferax}.

Finally, the \textit{Bacteriovorax} genome exhibits the typical for bacteria three-dimensional organization centered on the axis defined by the replication origin and terminus. However, it does not display clear plectonemic CIDs. This is an interesting observation that might further illuminate the mechanisms of formation or lack of such domains. Strong plectonemic CIDs are rare in eukaryotes with one notable exception -- dinoflagellates, which display the largest and most pronounced such domains of any organism assayed so far, likely thanks to their unique genomic organization featuring unidirectional gene arrays many hundreds of kilobases long (thus generating extreme levels of supercoiling stress) and the also unique to them loss of histones as a main packaging component; it has been thus hypothesized that the frequent and strong interactions between nucleosomes in other eukaryotes overrides the formation of plectonemes driven by transcription-induced supercoiling, but in dinoflagellates that effect is unmasked\cite{MarinovSymb}. Conversely, if analogous such interactions are introduced in groups that normally do not feature histones and exhibit supercoiling domains, such as bacteria, it may be expected for CID-like domains to disappear. We do not observe clear CIDs in \textit{Bacteriovorax}, but they are also not always robust in all bacteria, and not seen in the non-histone carrying crenarcheaote \textit{Sulfolobus} while having been reported from \textit{Haloferax}. However, more recent evidence points to \textit{Haloferax} possessing histone genes but not actually having histone-based chromatin\cite{Sakrikar2021,Jevtic2019,Sakrikar2021,Dulmage2015}, in which case some other chromatin protein must be responsible for the high protection from Tn5 insertion measured over its genome, and other chromatin proteins might also be actively shaping the physical genome of other archaeons and bacteria. Further expanding the coverage of physical genome mapping over the deep diversity in the tree of life can be expected to resolve these and many other questions. 

\section*{Methods}

Except where explicitly indicated otherwise, data was processed using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts})

\subsection*{Cell culture}

We obtained the strain designated as \textit{Bdellovibrio bacteriovorus} ``ICPB 3264 [H-I A3.12]'' from ATCC (\burl{https://www.atcc.org/products/25637}). As discussed in the main text, subsequent analysis revealed that this strain does not in fact belong to the \textit{Bdellovibrio bacteriovorus} clade, but is instead a \textit{Bacteriovorax} strain that for which sequence data did not previously exist.

The cells were grown in ATCC medium 526 (10.0 g Peptone, 3.0 g Yeast extract, 1.0 L distilled water, autoclaved at 121$\,^{\circ}\mathrm{C}$) at 30$\,^{\circ}\mathrm{C}$, except where otherwise indicated for the HEPES starvation condition (in which case cells were centrifuged down and the media was replaced with HEPES buffer supplemented with 2 mM CaCl$_2$ and 4 mM MgCl$_2$ as previously described\cite{Dwidar2017}).

\subsection*{Genome sequencing}

Genomic DNA (gDNA) was isolated using the NEB Monarch Genomic DNA Purification Kit (Cat \# T3010). 

For Illumina sequencing, genomic DNA libraries were generated using Tn5 transposition as previously described\cite{Marinov2022Haloferax}. Briefly, $\sim$200 ng DNA gDNA were used, with the volume increased to 22.5 $\mu$L using ultrapure H$_2$O. Transposition was carried out with 22.5 $\mu$L gDNA, 25 $\mu$L 2$\times$TD buffer (20 mM Tris-HCl pH 7.6, 10 mM MgCl$_2$, 20\% Dimethyl Formamide), and 2.5 $\mu$L Tn5, incubating at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. Transposed DNA was immediately purified using the Qiagen MinElute PCR Purification Kit (Cat \# 28004), with the reaction stopped with 250 $\mu$L PB buffer, and elution in 10 $\mu$L EB buffer. Final library amplification was carried out by mixing the 10 $\mu$L eluate, 10 $\mu$L H$_2$O, 2.5 $\mu$L i5 primer, 2.5 $\mu$L i7 primer, and 25 $\mu$L NEBNext High-Fidelity 2$\times$ PCR Master Mix, using the following thermocycler program: 3 minutes at $72\,^{\circ}\mathrm{C}$, 30 seconds at $98\,^{\circ}\mathrm{C}$, 10 cycles of: $98\,^{\circ}\mathrm{C}$ for 10 seconds, $63\,^{\circ}\mathrm{C}$ for 30 seconds, $72\,^{\circ}\mathrm{C}$ for 30 seconds. Final libraries were purified using the MinElute PCR Purification Kit. 

Illumina libraries were sequenced in a 2 $\times$ 150 bp format using the Novogene sequencing service.

Nanopore sequencing was carried out using Flongle flowcells and the SQK-LSK114 library generation kit following the manufacturer's instructions. 

\subsection*{Genome assembly}

The \verb|guppy| base caller (version 6.4.6) was used for nanopore base calling  with the following settings: \verb|--flowcell| \verb|FLO-FLG114| \verb|--kit| \verb|SQK-LSK114| \verb|--recursive| \verb|--trim_strategy| \verb|none|.

Genome assembly was generated using the SPAdes assembler\cite{Bankevich2012} (version 3.15.4) in a hybrid nanopore/Illumina mode.

\subsection*{Genome annotation}

Genome annotation was carried out using the RAST web server\cite{Aziz2008}. Ribosomal RNAs and other ncRNAs were annotated using Infernal\cite{infernal} (version 1.1.1) and the RFAM database\cite{RFAM} (version 12.0) and RNAmmer\cite{RNAmmer} (version 1.2). 

\subsection*{Sequence analysis}

DNA/RNA and protein multiple sequence alignment was carried out using \cite{Edgar2004}. 

Ribosomal RNA phylogenetic trees were built using RAxML-NG\cite{Kozlov2019} (version 1.2.0) with the following settings: \verb|--model| \verb|GTR+G|, with all full-length or near full-length 16S rRNA sequences for Bdellovibrionota strains available on GenBank. 

Protein sequences were translated from the newly generated genome annotation and available genome annotations for other sequenced Bdellovibrionota strains, and then protein domains were annotated using HMMER3\cite{HMMER3} (version 3.2.1) and the PFAM database\cite{PFAM} (version 32.0)

\subsection*{Short-read 5-methylcytosine profiling}

Genomic DNA was sheared on a Covaris E220, then converted into sequencing libraries following the EM-seq protocol, using the NEBNext Enzymatic Methyl-seq Kit (NEB, Cat \# E7120L), and sequenced as 2 $\times$ 150mers on a NovaSeq X through Novogene.

Adapters were trimmed from sequencing reads using Trimmomatic\cite{trimmomatic} (version 0.36). Trimmed reads were aligned against our \textit{de novo} genome assembly using \verb|bwa-meth| with default settings. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Methylation calls were extracted using \verb|MethylDackel| (\burl{https://github.com/dpryan79/MethylDackel}). Additional analyses were carried out using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts}). 

\subsection*{ATAC-seq experiments}

ATAC-seq experiments were carried out as previously described\cite{Melfi2021,Marinov2022Haloferax}. 

% For native ATAC-seq, cells ($\sim$10 $\times$ 10$^6$ cells as measured by OD$_{600}$) were pelleted at 13,000 RPM for 1 minute, then resuspended in 400 $\mu$L Wash Buffer (33 mM Tris-HCl pH 8.0, 20\% sucrose) and pelleted again at at 13,000 RPM for 1 minute. Lysis was then carried out by resuspending cells in 400 $\mu$L Permeabilization Buffer (33 mM Tris-HCl pH 8.0, 20\% sucrose, 25 $\mu$g/mL lysozyme) and incubating 5 minutes at $30\,^{\circ}\mathrm{C}$ in a ThermoMixer (with shaking at 1000 RPM). Cells were again pelleted at 13,000 RPM for 1 minute, resuspended in 50 $\mu$L transposition mix (25 $\mu$L 2$\times$ TD buffer, 2.5 $\mu$L Tn5, 22.5 $\mu$L ultrapure H$_2$O), and incubated at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. The reaction was stopped by adding 250 $\mu$L PB Buffer and purified using the MinElute PCR Purification Kit (Qiagen, Cat \# 28006), eluting in 10 $\mu$L EB buffer. PCR was carried out by mixing the 10 $\mu$L eluate, 10 $\mu$L H$_2$O, 2.5 $\mu$L i5 primer, 2.5 $\mu$L i7 primer, and 25 $\mu$L NEBNext High-Fidelity 2$\times$ PCR Master Mix, using the following thermocycler program: 3 minutes at $72\,^{\circ}\mathrm{C}$, 30 seconds at $98\,^{\circ}\mathrm{C}$, 10 cycles of: $98\,^{\circ}\mathrm{C}$ for 10 seconds, $63\,^{\circ}\mathrm{C}$ for 30 seconds, $72\,^{\circ}\mathrm{C}$ for 30 seconds. Final libraries were purified using the MinElute PCR Purification Kit. 

Cells were fixed by adding 37\% formaldehyde (Sigma) at a final concentration of 1\% and incubating for 15 minutes at room temperature. Formaldehyde was then quenched using 2.5 M glycine at a final concentration of 0.25 M. Cells were subsequently centrifuged at 13,000 RPM for 1 minute, washed once in 1$\times$ PBS, and centrifuged again at 13,000 RPM for 1 minute. Lysis was then carried out by resuspending cells in 400 $\mu$L Permeabilization Buffer (33 mM Tris-HCl pH 8.0, 20\% sucrose, 25 $\mu$g/mL lysozyme) and incubating 5 minutes at $30\,^{\circ}\mathrm{C}$ in a ThermoMixer (with shaking at 1000 RPM). Cells were again pelleted at 13,000 RPM for 1 minute, resuspended in 50 $\mu$L transposition mix (25 $\mu$L 2$\times$ TD buffer, 2.5 $\mu$L Tn5, 22.5 $\mu$L ultrapure H$_2$O), and incubated at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. The reaction was stopped with the addition of 150 $\mu$L IP Elution Buffer (1\% SDS, 0.1 M NaHCO$_3$) and 2 $\mu$L Proteinase K (Promega, Cat \# MC5005), then incubated at 65$\,^{\circ}\mathrm{C}$ overnight to reverse crosslinks. DNA was isolated by adding an equal volume of 25:24:1 phenol:chloroform:isoamyl solution, vortexing and centrifuging for 3 minutes at 14,000 rpm, then purifying the top aqueous phase using the MinElute PCR Purification Kit, eluting in 10 $\mu$L EB buffer. Libraries were generated as already described above.

\subsection*{ATAC-seq data processing}

Demultipexed FASTQ files were mapped to the \textit{de novo} \textit{Bacteriovorax} sp. genome assembly as 2$\times$36mers using Bowtie\cite{Bowtie2009} (version 1.0.1) with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). 

TSS scores were calculated as the ratio of ATAC signal in the region $\pm$100 bp around TSSs versus the ATAC signal of the 100-bp regions centered at the two points $\pm$2 kbp of the TSS as previously described\cite{MarinovShipony2021}.

Peak calling was carried out using MACS2\cite{MACS2} with the following settings: \verb|-g| \verb|4150000| \verb|-f| \verb|BAM| \verb|--to-large| \verb|--keep-dup all| \verb|--nomodel|.

% \subsection*{NOMe-seq and dSMF experiments}

% NOMe-seq/dSMF experiments were carried out as previously described\cite{Shipony2020}, with some modifications. 

% Cells were crosslinked as described for ATAC-seq at 1\% formaldehyde concentration. The equivalent of $\sim$2.5M human cells, i.e. $\sim$2 $\times$ 10$^9$ \textit{Bdellovorax} cells were used as input. 

% Fixed cells were resuspended in 100 $\mu$L M.CviPI Reaction Buffer (50 mM Tris-HCl pH 8.5, 50 mM NaCl, 10 mM DTT), then treated with M.CviPI by adding 200 U of M.CviPI (NEB), SAM at 0.6 mM and sucrose at 300 mM, and incubating at $30\,^{\circ}\mathrm{C}$ for 20 minutes. After this incubation, 128 pmol SAM and another 100 U of enzyme were added, and a further incubation at $30\,^{\circ}\mathrm{C}$ for 20 minutes was carried out. For dSMF experiments, M.SssI treatment followed immediately, by adding 60 U of M.SssI (NEB), 128 pmol SAM, MgCl$_2$ at 10 mM and incubation at $30\,^{\circ}\mathrm{C}$ for 20 minutes. The reaction was stopped by adding an equal volume of Stop Buffer (20 mM Tris-HCl pH 8.5, 600 mM NaCl, 1\% SDS, 10 mM EDTA). 

% Crosslinks were reversed overnight at $65\,^{\circ}\mathrm{C}$, and DNA was isolated using the MinElute PCR Purification Kit (Qiagen, Cat \# 28006).

% Enzymatically labeled DNA was then sheared on a Covaris E220, and converted into sequencing libraries following the EM-seq protocol, using the NEBNext Enzymatic Methyl-seq Kit (NEB, Cat \# E7120L). 

% \subsection*{NOMe-seq data processing}

% Adapters were trimmed from reads using Trimmomatic\cite{trimmomatic} (version 0.36). Trimmed reads were aligned against the \textit{de novo} \hl{XXXX} genome assembly using \verb|bwa-meth| with default settings. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Methylation calls were extracted using \verb|MethylDackel| (\burl{https://github.com/dpryan79/MethylDackel}). Additional analyses were carried out using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts}). 

\subsection*{KAS-seq experiments}

KAS-seq experiments were carried out following the previously published protocol\cite{Wu2020} with some modifications in the sequencing library generation part\cite{Marinov2022Haloferax}. 

Briefly, a 500-mM N$_3$-kethoxal solution was brought to 37$\,^{\circ}\mathrm{C}$, then added to 1 mL of culture at a final concentration of 5 $\mu$M. Cells were then incubated for 5 minutes at 30$\,^{\circ}\mathrm{C}$ in a ThermoMixer at 1000 RPM. 

Cells were then pelleted by centrifugation at 13,000 RPM for 1 minute, resuspended in 100 $\mu$L 1$\times$ PBS buffer, and DNA was immediately isolated using the Monarch Genomic DNA Purification Kit (NEB, Cat \# T3010S), with the modification that elution was carried out with 87.5 $\mu$L 25 mM K$_3$BO$_3$ solution (pH 7.0).

Biotin was clicked onto kethoxal-modified guanines by mixing 87.5 $\mu$L DNA, 2.5 $\mu$L 20 mM DBCO-PEG4-biotin (Sigma, Cat \# 760749; DMSO solution), and 10 $\mu$L 10$\times$ PBS, and incubating at 37$\,^{\circ}\mathrm{C}$ for 90 minutes.

DNA was isolated using AMPure XP beads and eluted in 130 $\mu$L 25 mM K$_3$BO$_3$ (pH 7.0), then sheared on a Covaris E220 for 120 seconds down to $\sim$150-200 bp.

Libraries were built on beads using the NEBNext Ultra II DNA Library Prep kit (NEB, Cat \# E7645L). Biotin pull down was initiated py pipetting 20 $\mu$L Dynabeads MyOne Streptavidin T1 beads (ThermoFisher Scientific, Cat \# 65306) into DNA lo-bind tubes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB buffer (Tween Washing Buffer; 5 mM Tris-HCl pH 7.5; 0.5 mM EDTA; 1 M NaCl; 0.05\% Tween 20), then separated on magnet again and resuspended in 300 $\mu$L of 2$\times$ BB (Binding Buffer; 10 mM Tris-HCl pH 7.5, 1 mM EDTA; 2 M NaCl). The DNA (130 $\mu$L) was added together with 170 $\mu$L 0.1$\times$ TE buffer, and incubated at RT on rotator for $\geq$15 minutes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB, and incubated at 55$\,^{\circ}\mathrm{C}$ in a Termomixer for 2 minutes with shaking at 1000 rpm. Beads were again separated on magnet and the 200-$\mu$L 55$\,^{\circ}\mathrm{C}$ TWB wash step was repeated. Beads were separated on magnet and resuspended in 50 $\mu$L 0.1$\times$ TE. 

End repair was carried out by adding 7 $\mu$L NEB End Repair Buffer and 3 $\mu$L NEB End Repair Enzyme, incubating at 20$\,^{\circ}\mathrm{C}$ for 30 minutes, then at 65$\,^{\circ}\mathrm{C}$ for 30 minutes. 

End repair was followed by adaptor ligation by adding 2.5 $\mu$L NEB Adaptor, 1 $\mu$L NEB Ligation Enhancer and 30 $\mu$L NEB Ligation Mix, incubating at 20$\,^{\circ}\mathrm{C}$ for 20 minutes, then adding 3 $\mu$L USER Enzyme and incubating at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. Beads were separated on magnet, resuspended in 200 $\mu$L of 1$\times$ TWB, then incubated at 55$\,^{\circ}\mathrm{C}$ in a Thermomixer for 2 minutes with shaking at 1000 rpm. Subsequently beads were separated on magnet and resuspended in 100 $\mu$L of 0.1$\times$ TE, separated on magnet again, resuspended in 15 $\mu$L of 0.1$\times$ TE Buffer, and transfered to PCR tubes.

Beads were then incubated at $98\,^{\circ}\mathrm{C}$ for 10 minutes, and libraries were amplified by adding 5 $\mu$L of i5 primer, 5 $\mu$L of i7 primer and 25 $\mu$L of 2$\times$ Q5 Hot Start Polymerase Mix, using the following PCR program: 30 seconds at $98\,^{\circ}\mathrm{C}$; 15 cycles of $98\,^{\circ}\mathrm{C}$ for 10 seconds, $65\,^{\circ}\mathrm{C}$ for 30 seconds, and $72\,^{\circ}\mathrm{C}$ for 30 seconds; and a final extension at $72\,^{\circ}\mathrm{C}$  for 5 minutes.

Beads were separated on magnet and the final libraries were purified from the supernatant using 50 $\mu$L AMPure XP beads, eluting in 0.1$\times$ TE buffer. 

\subsection*{KAS-seq data processing}

Demultipexed FASTQ files were mapped to the \textit{de novo} \textit{Bacteriovorax} sp. genome assembly as 2$\times$36mers using Bowtie\cite{Bowtie2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). 

% \subsection*{Multimapping reads analysis}

% For the purpose of examining repetitive regions in the genome, such as the rRNA operons, which exist in two identical copies in the genome, and are thus not uniquely mappable, reads were mapped with the \verb|-a| option instead of \verb|-k 2| \verb|-m 1|. Normalization was carried out as previously described and discussed\cite{Marinov2015piwi}.

\subsection*{Differential accessibility/KAS-seq analysis}

The analysis of differential chromatin accessibility as measured using ATAC-seq or enriched for KAS-seq signal was carried out using DESeq2\cite{Love2014}. Read counts were calculated over promoters or gene bodies and used as input into DESeq2.

\subsection*{Hi-C experiments}

Hi-C experiments were carried out as previously described\cite{MarinovSymb,MarinovNucl} with some modifications. 

Cells were crosslinked using 37\% formaldehyde (Sigma) at a final concentration of 1\% for 15 minutes at room temperature. Formaldehyde was then quenched using 2.5 M Glycine at a final concentration of 0.25 M. Cells were centrifuged at 13,000 rpm for 2 minutes, washed once in 1$\times$ PBS, and stored at -80$\,^{\circ}\mathrm{C}$. 

Denaturation was carried out by resuspended cells in 50 $\mu$L of 0.5\% SDS and incubating at 62$\,^{\circ}\mathrm{C}$ for 10 minutes. SDS was quenched by adding 145 $\mu$L of H$_2$O and 25 $\mu$L of 10\% Triton X-100 and incubating at 37$\,^{\circ}\mathrm{C}$ for 15 minutes.

Restriction digestion was carried out by adding 25 $\mu$L of 10$\times$ CutSmart buffer and 100 U of the MluCI restriction enzyme (NEB $\#$R0538) plus 100 U of the MboI enzyme (NEB $\#$R0147),
then incubating for $\geq$2 hours at 37$\,^{\circ}\mathrm{C}$ in a Thermomixer at 900 rpm. The reaction was then incubated at 62$\,^{\circ}\mathrm{C}$ for 20 minutes in order to inactivate the restriction enzymes. 

Fragment ends were filled in by adding 37.5 $\mu$L of 0.4 mM biotin-14-dATP (ThermoFisher Scientific, $\#$ 19524-016), 1.5 $\mu$L each of 10 mM dCTP, dGTP and dTTP, and 8 $\mu$L of 5U/$\mu$L DNA Polymerase I Large (Klenow) Fragment (NEB $\#$M0210). The reaction was the incubated at 37$\,^{\circ}\mathrm{C}$ in a Thermomixer at 900 rpm for 45 minutes.

Fragment end ligation was carried out by adding 663 $\mu$L H$_2$O, 120 $\mu$L 10$\times$ NEB T4 DNA ligase buffer (NEB B0202), 100 $\mu$L of 10\% Triton X-100, 12 $\mu$L of 10 mg/mL Bovine Serum Albumin (100$\times$ BSA, NEB), 5 $\mu$L of 400 U/$\mu$L T4 DNA Ligase (NEB $\#$M0202), and incubating at room temperature for $\geq$4 hours with rotation. 

Cells were then pelleted by centrifugation at 13,000 rpm for 5 minutes. The pellet was resuspended in 200 $\mu$L Elution Buffer (1\% SDS, 0.1 M NaHCO$_3$), Proteinase K was added, and incubated at 65$\,^{\circ}\mathrm{C}$ overnight to reverse crosslinks. 

After addition of 600 $\mu$L 1$\times$TE buffer, DNA was sheared using a Covaris E220 instrument. DNA was then purified using the MinElute PCR Purification Kit (Qiagen $\#$28006), with elution in a total volume of 300 $\mu$L 1$\times$ EB buffer. 

For streptavidin pulldown of biotin-labeled DNA, 150 $\mu$L of 10 mg/mL Dynabeads MyOne Streptavidin T1 beads (Life Technologies, 65602) were separated on a magnetic stand, then washed with 180 $\mu$L of 1$\times$ TWB (Tween Washing Buffer; 5 mM Tris-HCl pH 7.5; 0.5 mM EDTA; 1 M NaCl; 0.05\% Tween 20). Beads were then resuspended in 300 $\mu$L of 2$\times$ Binding Buffer (10 mM Tris-HCl pH 7.5, 1 mM EDTA; 2 M NaCl), the sonicated DNA was added, and beads were incubated for $\geq$15 minutes at room temperature on a rotator. After separation on a magnetic stand, the beads were twice washed with 180 $\mu$L of 1$\times$ TWB, and heated at 55$\,^{\circ}\mathrm{C}$ in a Thermomixer with shaking for 2 minutes. 

Final libraries were prepared on beads using the NEBNext Ultra II DNA Library Prep Kit (NEB $\#$E7645). End repair was carried out by resuspending beads in 50 $\mu$L 1$\times$ EB buffer, and adding 3 $\mu$L NEB Ultra End Repair Enzyme and 7 $\mu$L NEB Ultra End Repair Enzyme, followed by incubation at 20$\,^{\circ}\mathrm{C}$ for 30 minutes and then at 65$\,^{\circ}\mathrm{C}$ for 30 minutes. 

Adapters were ligated to DNA fragments by adding 30 $\mu$L Blunt Ligation mix, 1 $\mu$L Ligation Enhancer and 2.5 $\mu$L NEB Adapter, incubating at 20$\,^{\circ}\mathrm{C}$ for 20 minutes, adding 3 $\mu$L USER enzyme, and incubating at 37$\,^{\circ}\mathrm{C}$ for 15 minutes. 

Beads were then separated on a magnetic stand, and washed with 180 $\mu$L TWB for 2 minutes at 55$\,^{\circ}\mathrm{C}$ at 1000 rpm in a Thermomixer. After separation on a magnetic stand, beads were washed in 100 $\mu$L 0.1 $\times$ TE buffer, then resuspended in 16 $\mu$L 0.1 $\times$ TE buffer, and heated at 98$\,^{\circ}\mathrm{C}$ for 10 minutes. 

For PCR, 5 $\mu$L of each of the i5 and i7 NEB Next sequencing adapters were added together with 25 $\mu$L 2$\times$ NEB Ultra PCR Mater Mix. PCR was carried out with a 98$\,^{\circ}\mathrm{C}$ incubation for 30 seconds and 12 cycles of 98$\,^{\circ}\mathrm{C}$ for 10 seconds, 65$\,^{\circ}\mathrm{C}$ for 30 seconds, and 72$\,^{\circ}\mathrm{C}$ for 1 minute, followed by incubation at 72$\,^{\circ}\mathrm{C}$ for 5 minutes. 

Beads were separated on a magnetic stand, and the supernatant was cleaned up using 1.8$\times$ AMPure XP beads. 

Libraries were sequenced as 2$\times$150mers on a Illumina NovaSeq X through Novogene.

\subsection*{Hi-C data processing}

Hi-C sequencing reads were processed against the \textit{de novo} \textit{Bacteriovorax} sp. assembly using the Juicer pipeline for analyzing Hi-C datasets\cite{Durand2016a} (version 1.6 of Juicer and version 2.13.07 of Juicer Tools) with default settings. 

Hi-C matrices were visualized using Juicebox\cite{Durand2016b}.

\subsection*{Data availability}

The sequencing datasets generated for and used in this study can be accessed from GEO accession \hl{XXXX}.

\section*{Author contributions}

G.K.M. conceptualized the study, carried out cell culture, and performed ATAC-seq, % NOMe-seq/dSMF, 
KAS-seq and Hi-C experiments together with B.D., analyzed the data and wrote the manuscript, with input from all authors. B.D. performed EM-seq experiments. W.J.G. and A.K. supervised the study.

\section*{Acknowledgments}

The authors would like to thank members of the Greenleaf and Kundaje labs for helpful discussions. This work was supported by NIH grants (P50HG007735, RO1 HG008140, U19AI057266 and UM1HG009442 to W.J.G., 1UM1HG009436 to W.J.G. and A.K., 1DP2OD022870-01 and 1U01HG009431 to A.K.), the Rita Allen Foundation (to W.J.G.), the Baxter Foundation Faculty Scholar Grant, and the Human Frontiers Science Program grant RGY006S (to W.J.G). W.J.G is a Chan Zuckerberg Biohub investigator and acknowledges grants 2017-174468 and 2018-182817 from the Chan Zuckerberg Initiative. 

\section*{Competing interests}

W.J.G. is a consultant and equity holder for 10x Genomics, Guardant Health, Quantapore, and Ultima Genomics, and cofounder of Protillion Biosciences, and is named on patents describing ATAC-seq. 

A.K. is a consulting Fellow with Illumina, a member of the SAB of OpenTargets (GSK), PatchBio, SerImmune and a scientific co-founder of RavelBio.

\begin{thebibliography}{100}

% \section*{References}

\input{references}

\end{thebibliography}

\end{multicols}

% \end{document}

\clearpage

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\setcounter{page}{1}
\renewcommand\thepage{{SM }\arabic{page}}

\begin{center}
% {\LARGE \textbf{\begin{spacing}{1.1}XXXX. \\ Supplementary Materials\end{spacing} }}
{\LARGE \textbf{Supplementary Materials}}
\end{center}

% \section*{Supplementary Tables}

\section*{Supplementary Figures}

\begin{figure*}[!hb]
\begin{center}
\includegraphics[width=18.5cm]{FigS1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Additional examples of extremely strongly paused promoters in \textit{Bacteriovorax}}. 
(A) Chaperone protein ClpB ATP-dependent unfoldase
(B) RNA Polymerase Sigma Factor RpoH1.
}
\label{FigS1}
\end{figure*}


\end{document}
