\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionAfterwards]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{0pt plus 0.3ex}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{fancyhdr}
\usepackage{tikz}
\usepackage{soul}
\usepackage{titlesec}
\usepackage{fancyhdr}
\usepackage{tikz}

\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\titleformat{\subsubsection}[block]{\normalsize\sc\itshape\filright}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}

\definecolor{linen}{rgb}{0.98, 0.94, 0.9}

\usepackage{fancyhdr}
\pagestyle{fancy}
\newlength{\oddmarginwidth}
\setlength{\oddmarginwidth}{1in+\hoffset+\oddsidemargin}
\newlength{\evenmarginwidth}
\setlength{\evenmarginwidth}{\evensidemargin+1in}
\fancyfootoffset[LO,RE]{\oddmarginwidth}
\fancyfootoffset[LE,RO]{\evenmarginwidth}
\fancyhead{}
\renewcommand{\headrulewidth}{0pt}
\fancyfoot{} 
\lfoot[C]{\tikz{\node[black,outer sep=-20pt,inner sep=5pt,fill=linen,text width=\dimexpr\paperwidth\relax,align=center] at (0,0) {\thepage};}}
% \lhead[LO,RE]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{Marinov, Chen et al.}};}}
% \lhead[LE,RO]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{5-hmU and chromatin accessibility distribution in dinoflagellates}};}}
\setlength{\footskip}{35pt}
\fancypagestyle{plain}{\pagestyle{fancy}}

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}

\title{\bf Genome-wide distribution of 5-hydroxymethyluracil and chromatin accessibility in the \textit{Breviolum minutum} genome}
\renewcommand\Authfont{\scshape\normalsize}
\author[1,*,$\#$]{Georgi K. Marinov}
\author[2,*]{Xinyi Chen}
\author[3]{Matthew P. Swaffer}
\author[4]{Tingting Xiang}
\author[1,5]{Anshul Kundaje}
\author[4]{Arthur R. Grossman}
\author[1,6,7,8,$\#$]{William J. Greenleaf}
\renewcommand\Affilfont{\itshape\small}
\affil[1]{Department of Genetics, Stanford University, Stanford, California 94305, USA}
\affil[2]{Department of Bioengineering, Stanford University, Stanford, California 94305, USA}
\affil[3]{Department of Biology, Stanford University, Stanford, CA 94305, USA}
\affil[4]{Department of Plant Biology, Carnegie Institution for Science, Stanford, California 94305, USA}
\affil[5]{Department of Computer Science, Stanford University, Stanford, California 94305, USA}
\affil[6]{Center for Personal Dynamic Regulomes, Stanford University, Stanford, California 94305, USA}
\affil[7]{Department of Applied Physics, Stanford University, Stanford, California 94305, USA}
\affil[8]{Chan Zuckerberg Biohub, San Francisco, California, USA}
\affil[*]{These authors contributed equally to this work}
\affil[$\#$]{Corresponding author}
\date{}

\begin{document}
\maketitle

% \centerline{}
% \centerline{}
\begin{abstract}

\noindent {\normalsize \textbf{In dinoflagellates, the most unique and divergent nuclear organization among the known diversity of eukaryotes has evolved. The list of highly unusual features of dinoflagellate nuclei and genomes is long -- permanently condensed liquid crystalline chromosomes, in which histones are not the main packaging component, genes organized as very long unidirectional gene arrays, general absence of transcriptional regulation, high abundance of the otherwise very rare DNA modification 5-hydroxymethyluracil (5-hmU), and many others. Most of these fascinating properties were originally identified in the 1970s and 1980s but have received very little attention in recent decades using modern genomic tools. In this work, we address some of the outstanding questions regarding dinoflagellate genome organization by mapping the genome-wide distribution of 5-hmU (using both immunoprecipitation-based and basepair-resolution chemical mapping approaches) and of chromatin accessibility in the genome of the dinoflagellate \textit{Breviolum minutum}. We find that the 5-hmU modification is preferentially enriched over certain classes of repetitive elements, and also often coincides with the boundaries between gene arrays. It is generally anti-correlated with chromatin accessibility, the levels of which are lower in those regions. We discuss the potential roles of 5-hmU in the functional organization of dinoflagellate genomes and its relationship to the transcriptional landscape of gene arrays.
}}
\centerline{}
\centerline{}
\end{abstract}

\begin{multicols}{2}

\section*{Introduction}

Dinoflagellates are perhaps the most remarkable lineage within the known eukaryote diversity in terms of their numerous extreme deviations from the typical for other eukaryotes genomic and cellular organization, especially in their highly unusual nuclei\cite{Rizzo1991,WargoRizzo2001,Rizzo2003,Hackett2004,Lin2011,Wisecaver2011}. They are also a very diverse, successful and ecologically important group that includes numerous photosynthetic lineages, free living heterotrophs and even parasites, playing a major ecological role in marine ecosystems. The best known such example is the endosymbiotic association of Symbiodiniaceae dinoflagellates\cite{LaJeunesse2018} with reef-building corals. It is the photosynthetic capability of the dinoflagellate symbionts that provides the metabolic foundation for the highly biologically diverse reef ecosystems\cite{Trench1993}, and it is the expulsion of these symbionts from their host cells upon heat stress that leads to coral ``bleaching'' and to the eventual death of coral reefs\cite{Hoegh1999}, an increasingly acute problem in the modern world due to the effects of global climate change\cite{Hoegh2007}. 

The list of highly unorthodox features of dinoflagellate nuclei is long\cite{Rizzo1972,Rizzo1991,Rizzo2003,Rizzo1987,Hackett2004}. Dinoflagellate chromosomes exist in a permanently condensed liquid crystalline state throughout most of the cell cycle, and are characterized by an unusually low DNA to protein ratio (1:10, compared to 1:1 in other eukaryotes\cite{Herzog1981,WargoRizzo2001}). This is because they have lost nucleosomal histones as the main packaging component of chromatin. That role has instead been taken over by a distinct set of proteins -- small dinoflagellate-specific virus-derived nucleoproteins (DVNP) and histone-like proteins (HLPs) -- that appear to have been acquired through horizontal gene transfer from viruses and bacteria, respectively\cite{Gornik2012,Jano2017}. This is an extreme departure from the norm for an eukaryote -- nucleosomal chromatin is otherwise universal\cite{Talbert2019}. Furthermore, the four core histones -- H2A, H2B, H3 and H4 -- are the most conserved of all eukaryotic proteins, a result not just of their role in packaging DNA, but also because they, especially in their N-terminal tails, are subject to extensive posttranslational modifications (PTMs) that serve as the basis of the so-called ``histone code''\cite{Jenuwein2001}, which plays vital roles in all chromatin-related biological processes, such as transcription, gene regulation, replication, DNA repairs and others. Dinoflagellates have not lost histones -- in fact multiple and highly diverse histone genes are retained in all dinoflagellates for which genomic data is available\cite{Marinov2015} -- but they are highly divergent from those of other eukaryotes and it is not clear what role they might play in their nuclei.

Genome organization in dinoflagellates also represents a highly derived state, as their genes are organized into long unidirectional gene arrays\cite{Bachvaroff2008,Shoguchi2013,Aranda2016,Lin2015}, presumably transcribed as a single unit. \textit{Trans}-splicing is ubiquitous in the group, with mature mRNAs generated through the addition of a spliced leader (SL) sequence\cite{Zhang2007,Slamovits2008,Bachvaroff2008}. Transcriptional regulation is thought to be largely absent, with all genes  transcribed at all times. The primary mode of gene regulation is thought to be at the level of translation and/or RNA stability.

We still know very little about the inner workings of these remarkable nuclei, and most of the numerous fascinating questions regarding how eukaryotic transcription, replication and DNA repair occur and are regulated in the absence of histones and within permanently condensed liquid crystalline chromosomes remain unanswered. Recently, we and others\cite{Marinov2021,Nand2021} began to unravel some of these mysteries by applying three-dimensional genome conformation mapping using Hi-C\cite{Lieberman2009} to the members of  Symbiodiniaceae \textit{Breviolum minutum} and \textit{Symbiodinium microadriaticum}, showing that the genome is folded into distinct topologically associating domains coinciding with pairs of divergent gene arrays and separated by the points where convergent gene arrays meet (termed ``dinoTADs''). These domains appear to be the product of strong transcription-induced supercoiling in a context of extremely long transcriptional units and the absence of histones.

In this work, we address two other unanswered questions, by mapping the genomic distribution of and outlining potential roles in dinoflagellate genome biology for chromatin accessibility and another unusual feature of their genomes -- the otherwise very rare in eukaryotes 5-hydroxymethyluracil DNA modification. 

That dinoflagellates contain 5-hmU was first discovered in the 1970s\cite{Rae1973,Rae1976,Rae1978}, and it was found that unexpectedly large fractions of thymines (T) in the genome of various species are replaced by 5-hmU -- 12\% in \textit{Exuviaella cassubica}\cite{Rae1978}, 12\% in \textit{Symbiodinium microadriaticum}\cite{Rae1978}, 37\% in \textit{Crypthecodinium cohnii}\cite{Rae1978}, 38\% in \textit{Crypthecodinium cohnii}\cite{Steele1980}, 62\% in \textit{Amphidinium carterae}\cite{Rae1978}, 62.8\% in \textit{Prorocentrum micans}\cite{Herzog1982}, and 68\% in \textit{Peridinium triquetrum}\cite{Rae1978}. What functions 5-hmU might have is not known, but it has been suggested that it enhances the flexibility and hydrophilicity of double-stranded DNA\cite{Carson2016}, especially in some sequence contexts\cite{Davies1988,Pasternack1996,Vu1999}.

Curiously, there is one other lineage of eukaryotes in which 5-hmU has also been observed in non-negligible quantities\cite{Gommer1993}, and it is the major parasitic clade Kinetoplastida. There are many parallels between the genomic organization of dinoflagellates and kinetoplastids\cite{Lukes2009} -- although kinetoplastids have conventional nucleosomal chromatin, they too have lost transcriptional regulation as a primary mechanism for controlling gene expression and their genes are also organized into long arrays\cite{Ivens2005,El-Sayed2005a,El-Sayed2005b,Berriman2005}, with mature mRNAs being the product of \textit{trans}-splicing\cite{Boothroyd1982,Nelson1983,DeLange1984,DeLange1984B,Sutton1986}. These properties are shared with other members of the larger Euglenozoa lineage that have been studied, such as \textit{Euglena gracilis}\cite{Dooijes2000}. However, in kinetoplastids 5-hmU appears to be simply a precursor in the synthesis of the larger modification $\beta$-D-Glucopyranosyloxymethyluracil, better known as base J\cite{Gommer1991,Gommer1993b,Borst2008}, which does play a significant role in their genomes. Base J replaces about 1\% of thymines and is predominantly found in repetitive DNA, especially in telomeric regions\cite{vanLeeuwen1998,vanLeeuwen1997,vanLeeuwen1996}, but more importantly, it also demarcates the boundaries between gene arrays\cite{Cliffe2010} and likely prevents transcriptional readthrough events\cite{vanLuenen2012,Reynolds2014}. \textit{Euglena} has base J too\cite{Dooijes2000}.

The chromatin accessibility landscape in dinoflagellates has also not been mapped previously. While it is clear that nucleosomes are not the main packaging component of their chromatin, it is not known whether DVNPs and/or HLPs might provide similar levels of physical protection of DNA, and also whether there might be regions of the genome characterized by increased or reduced accessibility.

To answer these questions, we mapped the 5-hmU distribution in the genome of \textit{B. minutum}, finding that it is enriched over certain repetitive element classes and often around the boundaries between gene arrays. In contrast, chromatin accessibility is anti-correlated with elevated 5-hmU levels; this inverse relationship is specifically strong around gene array/dinoTAD boundaries, pointing to potential localization of histones (or other proteins that protect DNA) to regions enriched for 5-hmU (and thus conferring them greater protection from transposase insertion). We do not detect increased accessibility associated with transcription start sites (TSSs), and generally we do not observe strongly localized DNA accessibility peaks in the genome comparable to those in metazoans. These results provide a foundation for the future detailed understanding of the organization of transcription in dinoflagellates and its interplay with DNA modifications.
	
% \end{multicols}
% \renewcommand{\footnotesize}{\fontsize{10pt}{12pt}\selectfont}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Mapping the 5-hmU and chromatin accessibility landscape in \textit{B. minutum}}. 
(A) Proportion of human and \textit{B. minutum} in 5-hmU pull down and control libraries. A mixture of human and dinoflagellate DNA was used as input to MeDIP-seq experiments, and the fraction of reads that map to each genome is shown. The 5-hmU MeDIP-seq library is enriched for dinoflagellate reads confirming the specificity of 5-hmU pull down.
(B) Proportion of multimapping reads in 5-hmU pull down and control libraries. The 5-hmU MeDIP-seq library exhibits higher fraction of multimapping reads suggesting that 5-hmU may be enriched over repetitive elements.
(C) Metaprofiles of 5-hmU and control libraries signal over \textit{B. minutum} transcription start sites/gene starts. 
(D) Basepair-resolution chemical mapping of 5-hmU does not reveal a sequence motif associated with the modification in \textit{B. minutum}.
(E) Fragment length distribution of \textit{B. minutum} ATAC-seq datasets. Shown are uniquely mapping reads alone as well as all reads that can be mapped.
(F) Proportion of multimapping reads in \textit{B. minutum} ATAC-seq datasets as well as a control genomic DNA (gDNA) library. 
(G) Metaprofiles of ATAC-seq signal over \textit{B. minutum} transcription start sites/gene starts as well as the gDNA control. 
(H) Distribution of ATAC-seq peaks relative to annotated genomic features.
(I) Differential accessibility analysis for the 27$\,^{\circ}\mathrm{C}$ and 34$\,^{\circ}\mathrm{C}$ conditions.
% \rightline{\textit{(legend continued on next page)}}
} 
\label{Fig1}
\end{figure*}
% \clearpage
% \let\thefootnote\relax\footnotetext{

% \begin{multicols}{2}

\section*{Results}

\subsection*{Mapping the 5-hmU and chromatin accessibility landscape in \textit{B. minutum}}

In order the map the distribution of 5-hmU in the \textit{B. minutum} genome, we first adapted the MeDIP (Methylated DNA ImmunoPrecipitation) protocol\cite{Weber2005} for mapping DNA methylation using high-throughput sequencing (MeDIP-seq\cite{Down2008}). We used an antibody specific to 5-hmU (see Methods) and utilized a spike-in control to confirm that it specifically enriches for 5-hydoxymethyluracil. As mammalian genomes do not contain appreciable amounts of 5-hmU, we used a mixture of human and \textit{B. minutum} genomic DNA (gDNA) as input to the MeDIP procedure, and we also sequenced three different controls -- input DNA, ``depleted'' DNA (the supernatant remaining after the immunoprecipitation step), and an IgG control (using only beads with no primary antibody). We observed that the fraction of human reads decreased $\sim$2$\times$ after 5-hmU MeDIP relative to controls (Figure \ref{Fig1}A), confirming the specific enrichment of dinoflagellate DNA. We also made an interesting observation -- 5-hmU MeDIP is also $\sim$2$\times$ enriched for multimapping reads compared to the controls (Figure \ref{Fig1}B), hinting that 5-hmU might be preferentially associated with repetitive elements. 

We did not observe enrichment or 5-hmU around the starting positions of genes (Figure \ref{Fig1}C); there is in fact a slight depletion in the $\pm$1-kb region around them (note that the three spikes observed in the plot are an artefactual result due to the presence of collapsed repeats in the current \textit{B. minutum} assembly; see further discussion on this topic below).

We also deployed an orthogonal method for mapping 5-hmU at base-pair resolution using chemical conversion of 5-hmU into cytosine C (see the Methods section for details). In contrast to the 5mC modification in mammals, which is found specifically in a CpG context, we do not find any sequence preference for T bases modified into 5-hmU in \textit{B. minutum} (Figure \ref{Fig1}D). We note that the protocol we used does not provide for 100\% conversion rate of 5-hmU modified bases and it is thus at present not possible to estimate the absolute levels of 5-hmU in the \textit{B. minutum} genome based on chemical mapping data alone.

In order to map the \textit{B. minitum} chromatin accessibility landscape, we utilized ATAC-seq\cite{ATAC} (Assay for Transposase-Accessible Chromatin using sequencing), specifically in its omniATAC\cite{omniATAC} modification (see Methods). We generated a very deeply sequenced ($\sim$130 million mapped reads) library from actively growing cells as well as two replicates each for cells grown at the usual temperature of 27$\,^{\circ}\mathrm{C}$ and heat stressed cells incubated at 34$\,^{\circ}\mathrm{C}$.

In eukaryotes with nucleosomal chromatin, ATAC-seq libraries sequenced in a paired-end format on Illumina sequencers display a characteristic nucleosomal signature in their fragment length distribution, with a subnucleosomal peak at $\leq \sim$120 bp, a prominent mononucleosomal peak, and a weaker dinucleosomal peak. \textit{B. minutum} ATAC-seq only displays a peak at short fragment lengths ($\sim$ 60 bp), with no nucleosomal peaks (Figure \ref{Fig1}E). Thus, we conclude that wherever they are found in the genome, nucleosomes apparently are of too low abundance to substantially affect the overall fragment length distribution, while DVNPs and HLPs do not form structures consisting of multiple closely positioned proteins that strongly protect against transposition. Intriguingly, we observe a modest depletion of multimapping reads in ATAC-seq libraries relative to a matched naked gDNA control (Figure \ref{Fig1}F), i.e. the opposite trend of that observed for MeDIP-seq. ATAC-seq signal is also not enriched around gene start positions (Figure \ref{Fig1}G). 

Genome browser inspection of ATAC-seq and gDNA controls (Supplementary Figure \ref{FigS6} and \ref{FigS7}) revealed that the available \textit{B. minutum} assembly includes multiple collapsed repeats, i.e. in reality multi-copy sequences that are only present in the assembly as a single copy (or as many fewer copies than their actual abundance in the genome). This complicates interpretation of sequencing datasets as these regions appear as artificial ``peaks'' if analysis is not carried out against a proper control. Therefore, we performed all subsequent analysis as a comparison against matched input or negative gDNA controls. Peak calling also did not show a concentration of called peaks around gene starts/TSSs (Figure \ref{Fig1}H). We also note that called ATAC-seq peaks show overall lower enrichment over background/controls than those in human ATAC-seq datasets\cite{ENCODE2020} (Supplementary Figure \ref{FigS5}), i.e. we do not really observe strongly localized chromatin accessibility as in other eukaryote genomes. Comparing the heat stressed (34$\,^{\circ}\mathrm{C}$) and normal temperature (34 $\,^{\circ}\mathrm{C}$) conditions did not reveal large scale changes in the chromatin accessibility landscape (Figure \ref{Fig1}I).

\subsection*{Effect of exogenous expression of DVNPs on chromatin accessibility in the yeast \textit{S. cerevisiae}}

Previous studies had examined the effect of DVNPs on chromatin structure by exogenously expressing a DVNP (\textit{Hematodinium sp.} DVNP.5) in the yeast \textit{Saccharomyces cerevisiae}\cite{Irwin2018}. Assaying the resulting changes in the chromatin landscape using MNase-seq was reported to reveal nucleosome disruption, while overall the expression of the DVNPs had a negative effect on cell growth, likely because it impaired transcription. We sought to replicate and expand on these results by expressing several DVNPs in \textit{S. cerevisiae} and carrying out ATAC-seq as well as single-molecule footprinting (SMF\cite{Kelly2012,Krebs2017}; providing information about the absolute levels of accessibility/protection along the genome). 

We exogenously expressed (see Methods) three different DVNPs in yeast -- the previously assayed \textit{Hematodinium sp.} DVNP.5 as well as \textit{Hematodinium sp.} DVNP.12 and \textit{B. minutum} DVNP symbB.v1.2.006931. \textit{Hematodinium sp.} DVNP.5 had a negative effect on growth as previously reported, but much more strongly so than the other two. We carried out ATAC-seq and SMF using an endogenous control in all experiments -- \textit{Candida albicans} cells, which we used to account for internal experimental variation, as previously described\cite{Swaffer2021}. 

ATAC-seq did not reveal dramatic changes in the accessibility landscape upon DVNP expression (Supplementary Figures \ref{FigS1} and \ref{FigS2}) except perhaps for a slight decrease in the height of some peaks. On the the other hand, SMF data showed a decrease in accessibility around TSSs and reduced strength of nucleosome positioning (Supplementary Figure \ref{FigS1}), broadly consistent with the previous MNase-seq results suggesting nucleosome disruption induced by DVNPs\cite{Irwin2018}.

\subsection*{Inverse correlation between 5-hmU and chromatin accessibility}

Next, we examined the distribution of 5-hmU and chromatin accessibility around other available genomic features. We noticed that in many cases (although this is not an exclusive association) 5-hmU is enriched around the boundaries of dinoTADs while ATAC-seq shows decreased accessibility (Figure \ref{Fig2}A). We generalized this observation by evaluating the global ATAC-seq distribution around dinoTAD boundaries (Figure \ref{Fig2}C-E), and found that indeed ATAC-seq is globally depleted nearby these locations, while MeDIP-seq is enriched and 5-hmU chemical conversion rate is also elevated. Remarkably, the observed anti-correlation between chromatin accessibility and 5-hmU is specifically strong around dinoTAD boundaries. In contrast, we do not observe substantial inverse correlation between the two in the middle of dinoTAD domains (Figure \ref{Fig2}H). We do note these are not universal patterns, as a number of gene array boundaries do not show strong MeDIP enrichment and ATAC-seq depletion.

\begin{figure*}
\begin{center}
\includegraphics[width=17cm]{Fig2-TAD-boundaries.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{XXXX NOTE: MOST OF THESE PLOTS WILL BE REDONE XXX}. Inverse correlation between 5-hmU and chromatin accessibility and association with dinoTADs boundaries in the \textit{B. minutum} genome}. 
(A-B) Representative snapshots of the distribution of 5-hmU enrichment and decreased chromatin accessibility relative to dinoTAD boundaries.
(C) Depletion of ATAC-seq signal around dinoTAD boundaries.
(D) Enrichment of MeDIP-seq signal around dinoTAD boundaries.
(E) Increased 5-hmU chemical mapping conversion rate around dinoTAD boundaries.
(F-G) ATAC-seq and MeDIP-seq are generally anti-correlated (calculated for 5-kbp bins over the whole genome)
(H) ATAC-seq and MeDIP-seq are specifically strongly anti-correlated around dinoTAD boundaries.
}
\label{Fig2}
\end{figure*}

\subsection*{Association of 5-hmU and chromatin accessibility with repetitive elements}

Because of the previously noted enrichment and depletion of multimapping reads in 5-hmU and ATAC libraries, respectively, we then aimed to identify which, if any, repetitive elements might be specifically associated with 5-hmU and/or ATAC. We first examined the distribution of annotated repetitive elements (see Methods for details) around dinoTAD boundaries (Figures \ref{Fig3}A-C). We did not find a specific preference for dinoTAD boundaries for repeats as a whole, nor for any specific repeat family. However, Maverick DNA elements did exhibit strong enrichment around the edges of dinoTADs (Figures \ref{Fig3}C). Maverick does not account for all dinoTAD boundaries though -- while a majoriy of TAD boundaries show 5-hmU enrichment, Maverick elements are found in $\sim$11\% of them (Supplementary Figure \ref{FigS3}).

Global analysis of ATAC-seq depletion/enrichment over repetitive elements (Figures \ref{Fig3}D) showed that most repeats are depleted for accessibility, with Copia LTR and Maverick DNA elements most highly abundant in the gDNA control relative to ATAC-seq sample. The exception are CRE and RTE-RTE LINE elements.

MeDIP-seq data reveals a generally inverse picture -- most repeats are enriched in MeDIP libraries, with the exception of some LINE elements (Figures \ref{Fig3}E). Maverick DNA repeats are most strongly enriched for 5-hmU modifications.

These results point to increased protein occupancy and elevated 5-hmU levels over repetitive elements such as Maverick. We therefore asked whether we can specifically find nucleosomes corresponding to certain repeat classes. We utilized the nucleoATAC algorithm\cite{Schep2015} to identify positioned nucleosomes genome-wide in the \textit{B. minutum} genome (see Methods). We identified 30,107 low-resolution and 2,166 high-resolution putative positioned nucleosomes; these are not preferentially located to well defined general genomic features such as dinoTAD boundaries (Supplementary Figure \ref{FigS4}). V-plot analysis\cite{Henikoff2011} of the fragment distribution around positioned nucleosomes revealed an A-shaped structure, with a peak in the 120-160 bp range (this fragment length is higher for the smaller set of high-resolution nucleosomes; Supplementary Figure \ref{FigS4}), flanked by very short fragments. This is in contrast to what is observed in other eukaryotes such as yeast (Figures \ref{Fig3}F-G), where multiple nearby nucleosomes are visible. We interpret these structures are arising from a single positioned protective feature, quite possibly a histone-based nucleosome, without other strongly positioned nearby nucleosomes. We note that these observations are not explainable by mappability biases (i.e. only a single nucleosome is observed because all adjacent sequences are not mappable), as we carried out this analysis while allowing for multimapping reads and the center point of the putative positioned nucleosomes is in fact slightly less uniquely mappable than the flanks (Supplementary Figure \ref{FigS8}).

Strikingly, Maverick DNA are preferentially enriched for positioned nucleosomes, at $\sim$2$\times$ the genomic average (Figure \ref{Fig3}H).

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig3-repeats.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf \hl{XXXX NOTE: MOST OF THESE PLOTS WILL BE REDONE XXX}. Association of 5-hmU and chromatin accessibility with repetitive elements in the \textit{B. minutum} genome}. 
(A-C) Distribution of all repeats, individual repeat families, and some DNA elements around dinoTAD boundaries.
(D) ATAC-seq enrichment/depletion over repetitive elements.
(E) MeDIP-seq enrichment/depletion over repetitive elements.
(F-G) V-plot\cite{Henikoff2011} around positioned nucleosomes in S. cerevisiae (for comparison) and \textit{de novo} identified putative positioned nucleosomes in \textit{B. minutum}.
(H) Enrichment/depletion of positioned nucleosomes over repetitive elements.
}
\label{Fig3}
\end{figure*}

\section*{Discussion}

In this study, we provide the first global maps of the distribution of the 5-hmU modification and chromatin accessibility in a dinoflagellate species (\textit{B. minutum} in the Symbiodiniaceae clade). Our results point to a preferential enrichment for 5-hmU over certain classes of repetitive elements and also around the boundaries of the previously identified dinoTAD topologically associating domains that also coincide with the points of convergence of the long unidirectional arrays into which dinoflagellate genes are organized. In contrast, chromatin accessibility is depleted in those areas and is anti-correlated with high levels of 5-hmU. We do not observe strong accessibility peaks as seen in eukaryotes with conventional nucleosomal chromatin, nor do we see any preferential accessibility around transcription start sites, suggesting that most of the dinoflagellate genome is not protected by strings of nucleosomes and is generally physically accessible. We do identify several thousand putative positioned nucleosomes; however, these, if they are confirmed to be indeed histone-based nucleosomes, appear to be isolated and not parts of larger-order structures. An interesting trend that emerges is the association of elevated 5-hmU, decreased chromatin accessibility and increased frequency of positioned nucleosomes over certain repetitive elements, in particular Maverick DNA repeats, which are also enriched over dinoTAD boundaries. This is, however, by no means an absolute rule as not all dinoTAD boundaries are associated with such features.

Nevertheless, it is tempting to draw parallels between these initial observations in dinoflagellates and what is known in much more details for kinetoplastids\cite{Lukes2009}. The latter group shares the same general loss of transcriptional regulation as a primary mechanism for modulating gene regulation and the organization of the genome into long unidirectional gene arrays, and in kinetoplastids base J demarcates the regions between these arrays. Base J is synthesized through 5-hmU as an intermediate, and thus 5-hmU also is localized to the same regions of the genome in the cases where it has been measured (e.g. in \textit{Leishmania}\cite{Kawasaki2017,Kawasaki2018}). It is therefore plausible that it may play an analogous role in dinoflagellates, even though they have not evolved the further chemical elaboration that is base J. 

Such a proposition still leaves many unanswered questions. An obvious one is the mechanistic role of 5-hmU. In our previous work discovering the dinoTAD structures\cite{Marinov2021} , we showed that dinoTADs are dependent on transcriptional activity and disappear upon blocking transcription, i.e. they are most likely the product of extreme transcription induced DNA supercoiling. In the same time, 5-hmU has been reported to increase the flexibility of the DNA double helix\cite{Carson2016,Davies1988,Pasternack1996,Vu1999}, which naturally brings to mind a possible role for 5-hmU in alleviating the supercoiling stress under which dinoflagellate genomes appear to exist. However, the mechanistic details of such a link are not clear at the moment. 

Another open question is also why 5-hmU varies so much between different dinoflagellate species -- from 12\% to 68\% where it has been assayed -- and where it is located in the genome in the extreme cases. The suggested here preferential localization to dinoTAD boundaries is consistent with genome-wide rates of 5-hmU on the lower end of this spectrum (which is also what the available data for other Symbiodiniaceae points to\cite{Rae1978}). However, the \textit{B. minutum} genome is relatively small for a dinoflagellate -- only on the order of a 1 Gbp -- while other species have much larger and more repeat-rich genomes, which might be related to higher overall 5-hmU levels. In order to better understand its properties and functions, it will be important to assay 5-hmU in a wide variety of dinoflagellate species with diverse genomic characteristics. 

It will also be vital to have very high-quality genome assemblies to work with. For example, in our current work we have not been able to test whether 5-hmU is strongly associated with telomeres the way base J is in kinetoplastids, as the currently available \textit{B. minutum} assembly is of too poor quality to allow such analysis. 

The question of the precise role of histones, DVNPs and HLPs in dinoflagellate genomes also remains largely open. Here, we demonstrate decreased chromatin accessibility  over certain regions in the genome as well as the existence of nucleosome-like structures, which suggests the presence of nucleosomes along the genome. However, there is no substitute for the direct mapping of the location of histones using chromatin immunoprecipitation (ChIP). This is unfortunately still precluded due to the extreme sequence divergence of dinoflagellate histone proteins\cite{Marinov2015}, which means that existing anti-histone antibodies are not reliable reagents for carrying out such experiments in dinoflagellates. Establishing the absolute levels of protection/occupancy in dinoflagellate genomes, through the application of methylation-based (especially long read-based) and enzymatic approaches\cite{Oberbeckmann2019}  will also be highly valuable.

\section*{Methods}

\subsection*{\textit{B. minutum} cell culture}

The clonal axenic \textit{Symbiodinium}/\textit{Breviolum minutum} strain SSB01 was used in all experiments. Stock cultures were grown as previously described\cite{Xiang2013,Xiang2015} in Daigo's IMK medium for marine microalgae (Wako Pure Chemicals) supplemented with casein hydrolysate (IMK+Cas) at 27$\,^{\circ}\mathrm{C}$ at a light intensity of 10 $\mu$mol photons m$^{-2}$ s$^{-1}$ from Philips ALTO II 25-W bulbs on a 12-h-light:12-h-dark cycle. The medium was prepared in artificial seawater (ASW).

\subsection*{Genomic DNA isolation} 

\textit{B. minutum} genomic DNA was isolated as previously described\cite{Xiang2013}. Briefly, cells were centrifuged at 1,000 $g$ for 5 minutes, then resuspended in 500 $\mu$L 1$\times$ Cell Lysis Buffer (prepared by mixing equal volumes of 2$\times$ Cell Lysis Buffer -- 2\% SDS, 400 mM NaCl, 40 mM EDTA, 100 mM Tris-HCl, pH 8.0 -- and H$_2$O) and vortexed. The lysed cells were mixed with an equal 500 $\mu$L volume Phenol:Chloroform:Isoamyl alcohol (25:24:1), and mixed well by inverting a few times. The phases were centrifugation at 13,000 $g$ for 5 minutes, then the top phase was transferred to a new tube and treated with 4 $\mu$L Ribonuclease A (20 mg/mL) by incubating for 30 minutes at 37$\,^{\circ}\mathrm{C}$. 

DNA was purified by adding an equal volume Phenol:Chloroform:Isoamyl alcohol (25:24:1), mixing well and centrifuging at 13,000 $g$ for 5 minutes, then transferring the top layer to a new tube, to which Phenol:Chloroform:Isoamyl alcohol (25:24:1) was added again, and the centrifugation and top phase isolation was repeated. Then 2.5$\times$ volumes of 100\% EtOH were added and the mixture was incubated on ice for 30 minutes or at -20$\,^{\circ}\mathrm{C}$ overnight. The solution was then centrifuged at 13,000 $g$ at room temperature for 20 minutes, the pellet was washed with 70\% EtOH, dried on air and resuspended in 50 $\mu$L H$_2$O. 

% \subsection*{ATAC-see experiments} 

% \hl{XXX TO BE DONE}

\subsection*{ATAC-seq experiments} 

ATAC-seq experiments were performed following the omniATAC protocol\cite{omniATAC}. 

Briefly, $\sim$100K \textit{B. minutum} cells were centrifuged at 1,000 $g$, then resuspended in 500 $\mu$L 1$\times$ PBS and centrifuged again. Cells were then resuspended in 50 $\mu$L ATAC-RSB-Lysis buffer (10 mM Tris-HCl pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1\% IGEPAL CA-630, 0.1\% Tween-20, 0.01\% Digitonin) and incubated on ice for 3 minutes. Subsequently 1 mL ATAC-RSB-Wash buffer (10 mM Tris-HCl pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1\% Tween-20, 0.01\% Digitonin) were added, the tubes were inverted several times, and nuclei were centrifuged at 500 $g$ for 5 min at $4\,^{\circ}\mathrm{C}$. 

Transposition was carried out by resuspending nuclei in a mix of 25 $\mu$L 2$\times$ TD buffer (20 mM Tris-HCl pH 7.6, 10 mM MgCl$_2$, 20\% Dimethyl Formamide), 2.5 $\mu$L transposase (custom produced) and 22.5 $\mu$L nuclease-free H$_2$O, and incubating at 37$\,^{\circ}\mathrm{C}$ for 30 min in a Thermomixer at 1000 RPM. 

Transposed DNA was isolated using the MinElute PCR Purification Kit (Qiagen Cat\# 28004/28006), and PCR amplified as previously described\cite{omniATAC}. Libraries were purified using the MinElute kit, then sequenced on a Illumina NextSeq 550 instrument as 2$\times$36mers or  as 2$\times$75mers. 

\subsection*{ATAC-seq control experiments}

Genomic DNA controls for ATAC-seq were generated by transposing purified gDNA. Briefly, 100 ng of gDNA were mixed with 2 $\mu$L Tn5, 25 $\mu$L 2$\times $TD buffer and H$_2$O for a total volume of 50 $\mu$L, then incubated at 55$\,^{\circ}\mathrm{C}$ for 5 minutes. The reaction was stopped by immediately proceeding with DNA isolation using the MinElute kit. Libraries were generated as described above for ATAC-seq.

\subsection*{Genome assemblies}

Datasets were processed against either the original \textit{B. minutum} assembly\cite{Shoguchi2013} or against the Hi-C scaffolded assembly for \textit{B. minutum} previously described\cite{Marinov2021}, which is based on the original fragmented assembly for this species\cite{Shoguchi2013} and scaffolded into chromosome-level contigs using Hi-C data following established protocols\cite{Dudchenko2017}. 

\subsection*{General analysis procedures}

Browser tracks generation, fragment length estimation, and other analyses were carried out using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts}). 

\subsection*{Mappability track generation}

Mappability tracks were generated as by tiling the whole genome with reads of length $RL$ starting at each position. These reads were the mapped back to the genome using the same settings used for processing real datasets. Average mappability over each position was calculated as the ratio $RC/RL$ between its read coverage $RC$ and the read length $RL$.

\subsection*{ATAC-seq data processing}

Demultipexed FASTQ files were mapped as 2$\times$36mers using Bowtie\cite{Langmead2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata| \verb|-X 1000|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). This mapping generated a set of uniquely mapping alignments only.

For the purpose of the analysis of multimappers, alignments were generated with unlimited alignment multiplicity with the following settings: \verb|-v 2| \verb|-a| \verb|--best| \verb|--strata| \verb|-X 1000|. 

Normalization of multimappers was performed in two ways. 

First, the previously described\cite{Marinov2015b} method of dividing each alignment by its read multiplicity was applied, i.e:

\begin{equation}\label{multireads}
S_{c,i} = \cfrac{\displaystyle \sum_{R \in R_{c,i}}{\cfrac{1}{NH_R}}}{\cfrac{|R|}{10^6}}
\end{equation}

Where $S_{c,i}$ is the signal score for position $i$ on chromosome $c$ (in RPM, or Read Per Million mapped reads units), $|R|$ is the total number of mapped reads, $|R_{c,i}|$ is the number of reads covering position $i$ on chromosome $c$, and $NH_R$ is the number of locations in the genome a read maps to.

Second, \hl{XXXX}

Peak calling was carried out using MACS2\cite{MACS2}, with the gDNA library as a control, and with the following settings: \verb|-g| \verb|569785352| \verb|-f| \verb|BAMPE|. Differentially accessible regions were identified using DESeq2\cite{DESeq2}.

\subsection*{Analysis of positioned nucleosomes}

The analysis of positioned nucleosomes was carried out using NucleoATAC\cite{Schep2015}. \hl{XXXXX DETAILS XXXX }

% \subsection*{NOMe-seq experiments}

% \hl{XXX TO BE DONE}

\subsection*{MeDIP-seq experiments}

To prepare inputs for MeDIP-seq experiments, gDNA was first sonicated using a Qsonica S-4000 with a 1/16'' tip for 3 minutes, with 10 second pulses at intensity 3.5, and 20 seconds rest between pulses. The IP procedure was adapted from the protocol for ChIP-seq as previously described\cite{Marinov2017a}. 

For each reaction, 100 $\mu$L of Protein A Dynabeads (ThermoFisher Cat \# 10002D) were washed 3 times with a 5 mg/mL BSA solution. Beads were then resuspended in 1 mL BSA solution and 5 $\mu$L of $\alpha$-5-hmU antibody (Abcam Cat \# ab19735) were added. Coupling of antibodies to beads was carried out overnight on a rotator at $4\,^{\circ}\mathrm{C}$. Beads were again washed 3 times with BSA solution and resuspended in 100 $\mu$L of BSA solution.

Sheared genomic DNA ($\sim$1 $\mu$g 1:1 mix of \textit{B. minutum} and \textit{Homo sapiens}) was end repaired and adapters were ligated to it following the procedure of the NEBNext Ultra II DNA Library Prep Kit for Illumina (NEB, E7645S), purified using AMPure XP beads and eluted in 50 $\mu$L of H$_2$O, and then denatured at 98$\,^{\circ}\mathrm{C}$ for 10 minutes. DNA was then immediately placed on ice, resuspended in 850 $\mu$L RIPA buffer (1$\times$ PBS, 1\% IGEPAL, 0.5\% Sodium Deoxycholate, 0.1\% SDS, Roche Protease Inhibitor Cocktail) and added to the beads, then incubated overnight on a rotator at $4\,^{\circ}\mathrm{C}$. 

Beads were washed 5 times with LiCl buffer (10 mM Tris-HCl pH 7.5, 500 mM LiCl, 1\% NP-40/IGEPAL, 0.5\% Sodium Deoxycholate) by incubating for 10 minutes at $4\,^{\circ}\mathrm{C}$ on a rotator, then rinsed once with 1$\times$ TE buffer. Beads were then resuspended in 200 $\mu$L IP Elution Buffer (1\% SDS, 0.1 M NaHCO$_3$) and incubated at $65\,^{\circ}\mathrm{C}$ in a Thermomixer (Eppendorf) with interval mixing to dissociate antibodies. Beads were separated from the DNA solution by centrifugation, and DNA was purified using the MinElute kit. 

Library generation was completed by carrying out PCR following the rest of the steps of the NEBNext Ultra II DNA Library Prep Kit protocol, using 15 cycles of amplification.  Final libraries were purified using AMPure XP beads.

Several control libraries were prepared -- ``Input'' from the gDNA that was used as input to the immunoprecipitation, ``Depleted'' from the supernatant from the first bead separation after the incubation of DNA with beads, and ``IgG'', generated from a parallel immunoprecipitation reaction that used only Protein A beads (without a primary antibody)

\subsection*{MeDIP-seq data processing}

MeDIP-seq libraries processing was carried out in the same way as that of ATAC-seq datasets.

\subsection*{5-hmU chemical mapping experiments}

Chemical mapping of 5-hmU as carried out following the previously described by Kawasaki et al. chemical conversion method\cite{Kawasaki2018}. 

Briefly, sheared genomic DNA was used as input and end prep and adapter ligation were carried out using the NEBNext Ultra II DNA Library Prep Kit. After the ligation step, DNA was purified using AMPure XP beads and eluted in 50 $\mu$L of H$_2$O. DNA denaturation was performed by adding NaOH to a final concentration of 0.05M and incubating at 37$\,^{\circ}\mathrm{C}$ for 30 minutes. Oxidation was carried out by adding 2 $\mu$L of KRuO$_4$ solution (15 mM in 0.05 M NaOH) and incubating for 30 minutes at room temperature. Oxidized DNA was purified using AMPure XP beads and extension was carried out by mixing 13.5 $\mu$L DNA, 1.6 $\mu$L 100 mM MgSO$_4$, 2 $\mu$L NEB Index Primer, 2 $\mu$L 10$\times$ ThermoPol Reaction Buffer (NEB), 0.5 $\mu$L 10 mM dNTP mix, and 0.4 $\mu$L Bst DNA Polymerase, Large Fragment (NEB), then incubating for 1 hour at 37$\,^{\circ}\mathrm{C}$. PCR amplification was carried out using the NEB Ultra DNA Library Prep Kit, with 12 cycles of PCR. Final libraries were purified using AMPure XP beads.

\subsection*{Processing of 5-hmU chemical mapping datasets}

The \verb|slamdunk| package\cite{Neumann2019} (\burl{https://t-neumann.github.io/slamdunk/}), which was originally developed for the analysis of SLAM-seq\cite{Herzog2017} datasets (the SLAM-seq protocol also generates T $\rightarrow$ C conversions) was adapted to estimate 5-hmU conversion levels. 

First, the genome was tiled into 500-bp bins starting every 100 bp. Second, sequencing reads were trimmed of adaptors using Trim Galore, and used as input to \verb|slamdunk| together with the genome tiling with the following settings: \verb|--max-read-length| \verb|75| \verb|-5| \verb|9| \verb|-n 1000000| \verb|-m| \verb|--skip-sam|.

\subsection*{Repeat annotation}

\hl{XXX DETAILS XXX}

\subsection*{Analysis of ATAC-seq and MeDIP-seq data in repeat space}

\hl{XXX DETAILS XXX, normalization}

\subsection*{Yeast cell culture}

The MS47 \textit{S. cerevisiae} strain was used for all experiments. Cells were grown in YPD media (30$\,^{\circ}\mathrm{C}$) to OD$\sim$0.8 before collection. 

\hl{XXX \textit{Candida} details XXX}

\subsection*{Exogenous DVNP expression}

\hl{XXX DESCRIBE XXX}

\subsection*{Yeast SMF experiments}

Yeast SMF experiments were carried out as previously described\cite{Shipony2020,Marinov2023}

A 1:1 mixture of \textit{S. cerevisiae} cells expression DVNPs and \textit{Candida albicans} cells (used as a control for normalization, as previously described\cite{Swaffer2021}) amounting to a total of  $2.5 \times 10^8$ cells was used as input. Cells in log phase (OD$_{660} \leq 1.0$) were first centrifuged at 13,000 rpm for 1 minute, then washed with 100 $\mu$L Sorbitol Buffer(1.4 M Sorbitol, 40 mM HEPES-KOH pH 7.5, 0.5 mM MgCl$_2$), and centrifuged again at 13,000 rpm for 1 minute. Cells were then spheroplasted by resuspending in 200 $\mu$L Sorbitol Buffer with DTT added at a final concentration of 10 mM and 0.5 mg/mL 100T Zymolase, followed by incubating for 5 minutes at $30\,^{\circ}\mathrm{C}$ at 300 rpm in a Thermomixer. The pellet was centrifuged for 2 minutes at 5,000 rpm, washed in 100 $\mu$L Sorbitol Buffer, and centrifuged again at 5,000 rpm for 2 minutes.

Cells were then resuspended in 100 $\mu$L ice-cold Nuclei Lysis Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1 mM EDTA, 0.5\% NP-40) and incubated on ice for 10 minutes. Nuclei were then centrifuged at 5000 rpm for 5 min at $4\,^{\circ}\mathrm{C}$, resuspended in 100 $\mu$L cold Nuclei Wash Buffer (10 mM Tris pH 7.4, 10 mM NaCl, 3 mM MgCl$_2$, 0.1 mM EDTA), and centrifuged again at 5,000 rpm for 5 min at $4\,^{\circ}\mathrm{C}$. Finally, nuclei were resuspended in 100 $\mu$L M.CviPI Reaction Buffer (50 mM Tris-HCl pH 8.5, 50 mM NaCl, 10 mM DTT). 

Nuclei were then first treated with M.CviPI (GpC methyltransferase) by adding 200 U of M.CviPI (NEB), SAM at 0.6 mM and sucrose at 300 mM, and incubating at $30\,^{\circ}\mathrm{C}$ for 7.5 min. After this incubation, 128 pmol SAM and another 100 U of enzymes were added, and a further incubation at $30\,^{\circ}\mathrm{C}$ for 7.5 min was carried out. Immediately after, M.SssI treatment (CpG methyltransferase) followed, by adding 60 U of M.SssI (NEB), 128 pmol SAM, MgCl$_2$ at 10 mM and incubation at $30\,^{\circ}\mathrm{C}$ for 7.5 min. 

The reaction was stopped by adding an equal volume of Stop Buffer (20 mM Tris-HCl pH 8.5, 600 mM NaCl, 1\% SDS, 10 mM EDTA). 

HMW DNA was isolated using the MagAttract HMW DNA Kit (Qiagen; cat $\#$ 67563) following the manufacturer's instructions.

Enzymatically labeled DNA was then sheared on a Covaris E220 and converted into sequencing libraries following the EM-seq protocol, using the NEBNext Enzymatic Methyl-seq Kit (NEB, Cat \# E7120L). 

\subsection*{Yeast SMF data processing}

Adapters were trimmed from reads using Trimmomatic\cite{trimmomatic} (version 0.36). Trimmed reads were aligned against a combined \textit{S. cerevisiae} \textit{sacCer3} plus \textit{Candida albicans} \verb|C_glabrata_CBS138| genome index using \verb|bwa-meth| with default settings. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Methylation calls were extracted using \verb|MethylDackel| (\burl{https://github.com/dpryan79/MethylDackel}). Additional analyses were carried out using custom-written Python scripts (\burl{https://github.com/georgimarinov/GeorgiScripts}). 

Chemically mapped nucleosome positions in \textit{S. cerevisiae} were obtained from Brogaard et al. 2012 \cite{Brogaard2012} as previously described\cite{Shipony2020}.

\subsection*{Yeast ATAC-seq experiments}

Yeast ATAC-seq experiments were carried out as previously described\cite{Shipony2020,Marinov2022}. 

Briefly, ATAC-seq was carried out on the same nuclei isolated for SMF as described above (before resuspension in M.CviPI Reaction Buffer), by resuspending nuclei with 25 $\mu$L 2$\times$ TD buffer (20 mM Tris-HCl pH 7.6, 10 mM MgCl$_2$, 20\% Dimethyl Formamide), 2.5 $\mu$L transposase (custom produced) and 22.5 $\mu$L nuclease-free H$_2$O, and incubating at $37\,^{\circ}\mathrm{C}$ for 30 min in a Thermomixer at 1000 RPM. Transposed DNA was isolated using the DNA Clean \& Concentrator Kit (Zymo, cat $\#$ D4014) and PCR amplified as described before\cite{omniATAC}. Libraries were then sequenced on a Illumina NextSeq instrument as 2$\times$36mers or  as 2$\times$75mers. 

\subsection*{ATAC-seq data processing}

FASTQ files were mapped against a combined \textit{S. cerevisiae} \textit{sacCer3} plus \textit{Candida albicans} \verb|C_glabrata_CBS138| genome index as 2$\times$36mers using Bowtie\cite{Langmead2009} with the following settings: \verb|-v 2| \verb|-k 2| \verb|-m 1| \verb|--best| \verb|--strata|. Duplicate reads were removed using \verb|picard|\verb|-tools| (version 1.99). Additional analysis was carried out as previously described\cite{Marinov2020ATAC}.

\section*{Author contributions}

G.K.M. conceptualized the study, and carried out ATAC-seq, MeDIP and 5-hmU chemical mapping experiments. X.C. analyzed data. M.P.S. carried yeast DVNP expression experiments. T.X. carried out cell culture and DNA isolation. A.R.G, A.K. and W.J.G. supervised the study. G.K.M. and X.C. wrote the manuscript with input from all authors.

\section*{Acknowledgements}

This work was supported by NIH grants (P50HG007735, RO1 HG008140, U19AI057266 and UM1HG009442 to W.J.G., 1UM1HG009436 to W.J.G. and A.K., 1DP2OD022870-01 and 1U01HG009431 to A.K.), the Rita Allen Foundation (to W.J.G.), the Baxter Foundation Faculty Scholar Grant, and the Human Frontiers Science Program grant RGY006S (to W.J.G). W.J.G. is a Chan Zuckerberg Biohub investigator and acknowledges grants 2017-174468 and 2018-182817 from the Chan Zuckerberg Initiative. Fellowship support provided by the Stanford School of Medicine Dean's Fellowship (G.K.M.). This work is also supported by NSF-IOS EDGE Award 1645164 to A.R.G. and Carnegie Venture grant 10907 (to T.X. and G.K.M.).

The authors would like to thank Alexandro Trevino for supplying the $\alpha$-5-hmU antibody, Nicholas Irwin and Patrick Keeling for providing the construct for expressing  \textit{Hematodinium} DVNP.5, as well as members of the Greenleaf, Kundaje, Pringle and Grossman laboratories for helpful discussion and suggestions regarding this work.

\section*{Data Availability}

Data associated with this manuscript have been submitted to GEO under accession number \hl{XXXX}

\section*{Code Availability}

Custom code used to process the data is available at \burl{https://github.com/georgimarinov/GeorgiScripts} and \hl{XXXX}

\section*{Competing Interests}

The authors declare no competing interests.

\begin{thebibliography}{100}

% \section*{References}

\input{references}

\end{thebibliography}

\end{multicols}

\clearpage

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\setcounter{page}{1}
\renewcommand\thepage{{SM }\arabic{page}}

\begin{center}
% {\LARGE \textbf{\begin{spacing}{1.1}XXXX. \\ Supplementary Materials\end{spacing} }}
{\LARGE \textbf{Supplementary Materials}}
\end{center}

% \section*{Supplementary Tables}

\section*{Supplementary Figures}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS6-ATAC-browser-tracks.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Representative genome browser view of ATAC-seq and gDNA control signal in the \textit{B. minutum} genome}. 
}
\label{FigS6}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS7-ATAC-browser-tracks.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Representative genome browser view of ATAC-seq and gDNA control signal in the \textit{B. minutum} genome}. 
}
\label{FigS7}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS2-DVNP-ATAC1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Effects of exogenous expression dinoflagellate DVNPs on chromatin accessibility in the yeast \textit{S. cerevisiae}}. 
(A-B) ATAC-seq profiles of \textit{S. cerevisiae} expressing \textit{B. minutum} DVNP symbB.v1.2.006931 and \textit{Hematodinium} sp. DVNP.12 and control samples.
(C) SMF profiles (corrected using average SMF methylation from the \textit{Candida} internal control) over \textit{S. cerevisiae} TSSs in \textit{S. cerevisiae} expressing \textit{B. minutum} DVNP symbB.v1.2.006931 and \textit{Hematodinium} sp. DVNP.12 and control samples.
(D) SMF profiles (corrected using average SMF methylation from the \textit{Candida} internal control) over positioned \textit{S. cerevisiae} nucleosomes in \textit{S. cerevisiae} expressing \textit{B. minutum} DVNP symbB.v1.2.006931 and \textit{Hematodinium} sp. DVNP.12 and control samples.
}
\label{FigS2}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=8cm]{FigS5-K562-vs-dino-ATAC-enrichment.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Relative degree of ATAC-seq enrichment in \textit{B. minutum} versus a representative mammalian genome sample}. Shown is the $log_2$(fold change) ratio of ATAC-seq signal versus a negative control for a representative human ATAC-seq sample (K562 cell line from the ENCODE Project Consortium\cite{ENCODE2020}; dataset ID ENCFF512VEZ was used for ATAC and dataset ID ENCFF285UKJ -- a whole genome bisulfite sequencing library -- as a negative control, over peaks from dataset ID ENCFF695IGF). A separately sequenced gDNA control was generated for the \textit{B. minutum} ATAC.
}
\label{FigS5}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{FigS1-DVNP-ATAC.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Effects of exogenous expression dinoflagellate DVNPs on chromatin accessibility in the yeast \textit{S. cerevisiae}}. 
ATAC-seq profiles of \textit{S. cerevisiae} expressing \textit{Hematodinium} sp. DVNP.5 (from Irwin et al. 2018\cite{Irwin2018}) and a vehicle control, as well as additional replicates for \textit{B. minutum} DVNP symbB.v1.2.006931 and \textit{Hematodinium} sp. DVNP.12 and control samples. ``OFF'' and ``ON'' refer to cells in which the expression of \textit{Hematodinium} sp. DVNP.5 is induced or not.
}
\label{FigS1}
\end{figure*}

\clearpage

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=10cm]{FigS3-Venn.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Overlap between dinoTAD boundaries, regions of low accessibility, and regions of high 5-hmU}. 
}
\label{FigS3}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=10cm]{FigS4-nucleosomes-TAD.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Positioned nucleosomes as a whole are not strongly enriched around dinoTAD boundaries}. 
}
\label{FigS4}
\end{figure*}

\clearpage


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=16cm]{FigS8-nucleosomes.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Properties of putative positioned nucleosomes in the \textit{B. minutum} genome}. 
(A) V-plot of low-resolution positioned nucleosomes ($n$=30,107)
(B) V-plot of high-resolution positioned nucleosomes ($n$=2,166)
(C) Fragment distribution over low-resolution positioned nucleosomes
(D) Fragment distribution over high-resolution positioned nucleosomes
(E) Average mappability (for reads of length 75 bp) over positioned nucleosomes.
}
\label{FigS8}
\end{figure*}

\end{document}
