\documentclass[10pt]{article}
\usepackage[hmargin=1.5cm,top=2cm,bottom=2cm]{geometry}
\usepackage{multicol}
\setlength\columnsep{15pt}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{array}
\usepackage{booktabs}
\usepackage{tabularx}
\usepackage[auth-sc]{authblk}
\usepackage{longtable}
\usepackage{multirow}
\usepackage{hyperref}
\usepackage{enumerate}
\usepackage[labelfont=bf]{caption}
\usepackage[usenames,dvipsnames]{xcolor}
\usepackage{mdframed}
\usepackage{graphics}
\usepackage{multirow}
\usepackage{rotating}
\usepackage{array}
\usepackage{capt-of}
\usepackage{lscape}
\usepackage{caption}
\usepackage{breakurl}
\usepackage{todonotes}
\usepackage{hanging}
\usepackage{pagecolor}
\usepackage[final]{pdfpages}
\usepackage[leftFloats,CaptionAfterwards]{fltpage}
\usepackage[numbers,super,sort&compress]{natbib}
\setlength{\bibsep}{0pt plus 0.3ex}
\usepackage{abstract}
\usepackage{enumitem}
\usepackage{soul}
\usepackage{titlesec}
\titleformat{\section}[block]{\large\bfseries\filcenter}{\thesection.}{0.4em}{}
\titleformat{\subsection}[block]{\normalsize\sc\bfseries\filcenter}{\thesubsection.}{0.4em}{}
\titleformat{\subsubsection}[block]{\normalsize\sc\itshape\filright}{\thesubsection.}{0.4em}{}
\setcounter{secnumdepth}{5}

\usepackage[hang]{footmisc}
\setlength\footnotemargin{0em}

\setlength{\skip\footins}{0.75cm}

\definecolor{linen}{rgb}{0.98, 0.94, 0.9}

\usepackage{fancyhdr}
\pagestyle{fancy}
\newlength{\oddmarginwidth}
\setlength{\oddmarginwidth}{1in+\hoffset+\oddsidemargin}
\newlength{\evenmarginwidth}
\setlength{\evenmarginwidth}{\evensidemargin+1in}
\fancyfootoffset[LO,RE]{\oddmarginwidth}
\fancyfootoffset[LE,RO]{\evenmarginwidth}
\fancyhead{}
\renewcommand{\headrulewidth}{0pt}
\fancyfoot{} 
\lfoot[C]{\tikz{\node[black,outer sep=-20pt,inner sep=5pt,fill=linen,text width=\dimexpr\paperwidth\relax,align=center] at (0,0) {\thepage};}}
% \lhead[LO,RE]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{Marinov, Chen et al.}};}}
% \lhead[LE,RO]{\tikz{\node[black,outer sep=0pt,inner sep=5pt,fill=white,text width=\dimexpr\textwidth-1.5cm\relax,align=left] at (0,0) {\textbf{5-hmU and chromatin accessibility distribution in dinoflagellates}};}}
\setlength{\footskip}{35pt}
\fancypagestyle{plain}{\pagestyle{fancy}}

\makeatletter
\renewcommand\footnoterule{%
  \kern-3\p@
  \hrule\@width \textwidth height 1.5pt
  \kern2.6\p@}
\makeatother

\makeatletter
\def\@biblabel#1{\@ifnotempty{#1}{#1.}}
\makeatother

\newcommand{\filllastline}[1]{
\setlength\leftskip{0pt}
\setlength\rightskip{0pt}
\setlength\parfillskip{0pt}
#1}

\newenvironment{Figure}
{\par\medskip\noindent\minipage{\linewidth}}
{\endminipage\par\medskip}


\title{\bf An updated compendium and reevaluation of the evidence for nuclear transcription factor occupancy over the mitochondrial genome}
\renewcommand\Authfont{\scshape\normalsize}
\author[1]{Georgi K. Marinov,$\#$}
\author[1,2]{Vivekanandan Ramalingam}
\author[1,3,4,5]{William J. Greenleaf}
\author[1,2]{Anshul Kundaje}
\renewcommand\Affilfont{\itshape\normalsize}
\affil[1]{Department of Genetics, Stanford University, Stanford, CA 94305, USA}
\affil[2]{Department of Computer Science, Stanford University, Stanford, CA 94305, USA}
\affil[3]{Center for Personal Dynamic Regulomes, Stanford University, Stanford, California 94305, USA}
\affil[4]{Department of Applied Physics, Stanford University, Stanford, California 94305, USA}
\affil[5]{Chan Zuckerberg Biohub, San Francisco, California, USA}
\affil[$\#$]{Corresponding author}
% \affil[*]{These authors contributed equally}
\date{}

\begin{document}
\maketitle

% \centerline{}
% \centerline{}
\begin{abstract}

\noindent {\normalsize \textbf{In most eukaryotes, mitochondrial organelles contain their own genome, usually circular, which is the remnant of the genome of the ancestral bacterial endosymbiont that gave rise to modern mitochondria. Mitochondrial genomes are dramatically reduced in their gene content due to the process of endosymbiotic gene transfer to the nucleus; as result most mitochondrial proteins are encoded in the nucleus and imported into mitochondria. This includes the components of the dedicated mitochondrial transcription and replication systems and regulatory factors, which are entirely distinct from the information processing systems in the nucleus. However, since the 1990s several nuclear transcription factors have been reported to act in mitochondria, and previously we identified 8 human and 3 mouse transcription factors (TFs) with strong localized enrichment over the mitochondrial genome using ChIP-seq (\textbf{Ch}romatin \textbf{I}mmuno\textbf{p}recipitation) datasets from the second phase of the ENCODE (\textbf{Enc}yclopedia \textbf{o}f \textbf{D}NA \textbf{E}lements) Project Consortium. Here, we analyze the greatly expanded in the intervening decade ENCODE compendium of TF ChIP-seq datasets (a total of 6,153 ChIP experiments for 942 proteins, of which 763 are sequence-specific TFs) combined with interpretative deep learning models of TF occupancy to create a comprehensive compendium of nuclear TFs that show evidence of association with the mitochondrial genome. We find some evidence for chrM occupancy for 50 nuclear TFs and two other proteins. However, we also observe that in cases where the same TF has been assayed with multiple antibodies and ChIP protocols, evidence for its chrM occupancy is not always reproducible. In the light of these findings, we discuss the evidential criteria for establishing chrM occupancy and reevaluate the overall compendium of putative mitochondrial-acting nuclear TFs.
}
}
\centerline{}
\centerline{}
\end{abstract}

\begin{multicols}{2}


\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Global assessment of the evidence for association of human nuclear TFs with the mitochondrial genome.}. 
(A) Summary of the known human TFs and available ENCODE TF ChIP datasets. The TF classification of human TFs of Lambert et al. 2018\cite{Lambert2018} was followed.
(B) Hierarchical clustering of ChIP-seq profiles over the mitochondrial genome for 6,513 TF ChIP-seq datasets. Datasets that show evidence for non-artefactual association with mitochondrial DNA are highlighted at the bottom.
} 
\label{Fig1}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig3-ATF2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ATF2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR869IUD; antibody Bethyl Labs A301-649A).
(B) BPNet predictions over chrM;
(C) H1-hESC ChIP-seq (ENCODE ID ENCSR000BQU; antibody Santa Cruz Biotech sc-81188, Lot ID H0609);
(D) HEK293 CETCH-seq (ENCODE ID ENCSR217HTK).
% (A) GM12878 ChIP-seq (ENCODE ID ENCSR961PPA; antibody );
(E) HepG2 ChIP-seq (ENCODE ID ENCSR047BUZ; antibody: Bethyl Labs A301-649A);
(F) HepG2 CETCH-seq (ENCODE ID ENCSR908HWZ);
} 
\label{Fig3}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig4-ATF3.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ATF3 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over the mitochondrial genome.
(A) K562 ChIP-seq (ENCODE ID ENCSR028UIU; antibody: Active Motif 2/61715);
(B) BPNet predictions over chrM;
(C) H1-hESC ChIP-seq  (ENCODE ID ENCSR000BKC; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(D) HCT116 ChIP-seq  (ENCODE ID ENCSR000BUG; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(E) K562 ChIP-seq  (ENCODE ID ENCSR000BNU; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(F) HepG2 ChIP-seq  (ENCODE ID ENCSR000BKE; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(G) A549 0.02\% EtOH 1h ChIP-seq  (ENCODE ID ENCSR000BPS; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(H) GM18278 ChIP-seq  (ENCODE ID ENCSR000BJY; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(I) liver ChIP-seq  (ENCODE ID ENCSR480LIS; antibody: Santa Cruz Biotech sc-188, Lot ID J2209);
(J) HepG2 CETCH-seq  (ENCODE ID ENCSR402ZCY).
} 
\label{Fig4}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig5-ATF4.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ATF4 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR145TSJ; antibody: Cell Signaling 11815S);
(B) BPNet predictions over chrM;
(C) HepG2 CETCH-seq (ENCODE ID ENCSR288ZFV).
} 
\label{Fig5}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig6-ATF7.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ATF7 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) MCF-7 ChIP-Seq (ENCODE ID ENCSR866QPZ; antibody Sigma HPA003384, Lot ID R04563);
(B) BPNet predictions over chrM;
(C) GM12878 ChIP-seq (ENCODE ID ENCSR014YCR; antibody Sigma HPA003384, Lot ID R04563);
(D) K562 ChIP-seq (ENCODE ID ENCSR972ZBV; antibody Sigma HPA003384, Lot ID R04563);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR516DDO; antibody: Sigma F1804, Lot ID SLBK1346V).
} 
\label{Fig6}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig9-CREB1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the CREB1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 ChIP-seq (ENCODE ID ENCSR112ALD; antibody: Cell Signaling 9197S, Lot ID 16);
(B) BPNet predictions over chrM;
(C) HepG2 CETCH-seq (ENCODE ID ENCSR027JQN);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR897JAS; antibody: Cell Signaling 9197S, Lot ID 16);
(E) GM23338 CETCH-seq (ENCODE ID ENCSR214ZAV);
(F) K562 ChIP-seq (ENCODE ID ENCSR000BSO; antibody: Santa Cruz Biotech sc-240, Lot ID C2306);
(G) GM12878 ChIP-seq (ENCODE ID ENCSR000BUF; antibody: Santa Cruz Biotech sc-240, Lot ID C2306);
(H) H1-hESC ChIP-seq (ENCODE ID ENCSR000BSN; antibody: Santa Cruz Biotech sc-240, Lot ID C2306);
(I) Ishikawa ChIP-seq (ENCODE ID ENCSR000BUR; antibody: Santa Cruz Biotech sc-240, Lot ID C2306);
(F) K562 CETCH-seq (ENCODE ID ENCSR016RFR).
} 
\label{Fig9}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig12-FOS.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the FOS transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) MCF-7 ChIP-seq (ENCODE ID ENCSR569XNP; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR000FAI; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810);
(D) IMR-90 ChIP-seq (ENCODE ID ENCSR124AIG; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810);
(E) endothelial cell of umbilical vein ChIP-seq (ENCODE ID ENCSR000EVU; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810);
(F) GM12878 ChIP-seq (ENCODE ID ENCSR000EYZ; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810);
(G) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EZE; antibody: Santa Cruz Biotech sc-7202, Lot ID K0810).
} 
\label{Fig12}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig39-FOSL1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the FOSL1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR239ZLZ; GFP-tagged);
(B) BPNet predictions over chrM;
(C) K562 CETCH-seq (ENCODE ID ENCSR000BMV);
(D) H1-hESC ChIP-seq (ENCODE ID ENCSR000BNS; antibody: Santa Cruz Biotech sc-183, Lot ID I0809);
(E) HCT116 ChIP-seq (ENCODE ID ENCSR000BTE; antibody: Santa Cruz Biotech sc-183, Lot ID I0809);
(F) HepG2 CETCH-seq (ENCODE ID ENCFF660RIA).
} 
\label{Fig39}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig13-FOSL2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the FOSL2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) A549 CETCH-seq (ENCODE ID ENCSR448TVS);
(B) BPNet predictions over chrM;
(C) A549 ChIP-seq (ENCODE ID ENCSR593DGU; antibody: Santa Cruz Biotech sc-160, Lot ID H2713);
(D) A549 EtOH 1 hour ChIP-seq (ENCODE ID ENCSR000BQO; antibody: Santa Cruz Biotech sc-604);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR000BHP; antibody: Santa Cruz Biotech sc-604);
(F) A549 Dex 8 hours ChIP-seq (ENCODE ID ENCSR242EWU; antibody: Santa Cruz Biotech sc-160, Lot ID H2713);
(G) MCF-7 CETCH-seq (ENCODE ID ENCSR546KCN);
(H) MCF-7 ChIP-seq (ENCODE ID ENCSR000BUI; antibody: Santa Cruz Biotech sc-604);
(I) SK-N-SH ChIP-seq (ENCODE ID ENCSR000BVB; antibody: Santa Cruz Biotech sc-604);
(J) HepG2 CETCH-seq (ENCODE ID ENCSR249EYB).
} 
\label{Fig13}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig14-CEBPB-1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the CEBPB transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 CETCH-seq (ENCODE ID ENCSR416QLJ);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR000EHE; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(D) IMR-90 ChIP-seq (ENCODE ID ENCSR000EFM; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR000EEE; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(F) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EDA; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(G) HepG2 forskolin ChIP-seq (ENCODE ID ENCSR000EEX; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(H) GM12878 ChIP-seq (ENCODE ID ENCSR000BRX; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(I) H1-hESC ChIP-seq (ENCODE ID ENCSR000EBV; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(J) HepG2 ChIP-seq (ENCODE ID ENCSR000BQI; antibody: Santa Cruz Biotech sc-150, Lot ID I1010).
} 
\label{Fig14a}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig14-CEBPB-2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the CEBPB transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HCT116 ChIP-seq (ENCODE ID ENCSR000BSD; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(B) Ishikawa ChIP-seq (ENCODE ID ENCSR000BTT; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(C) MCF-7 ChIP-seq (ENCODE ID ENCSR000BSR; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(D) A549 ChIP-seq (ENCODE ID ENCSR000BUB; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(E) A549 ChIP-seq (ENCODE ID ENCSR000DYI; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(F) A549 ChIP-seq (ENCODE ID ENCSR701TCU; antibody: Santa Cruz Biotech sc-150, Lot ID I1010);
(G) A549 dexamethasone 0.5 hours ChIP-seq (ENCODE ID ENCSR447ZMS; antibody: Santa Cruz Biotech sc-150, Lot ID D2315);
(H) A549 dexamethasone 2 hours ChIP-seq (ENCODE ID ENCSR182OZC; antibody: Santa Cruz Biotech sc-150, Lot ID D2315);
(I) A549 dexamethasone 3 hours ChIP-seq (ENCODE ID ENCSR216GEB; antibody: Santa Cruz Biotech sc-150, Lot ID D2315);
(K) A549 dexamethasone 4 hours ChIP-seq (ENCODE ID ENCSR606ZTC; antibody: Santa Cruz Biotech sc-150, Lot ID D2315);
(L) A549 dexamethasone 7 hours ChIP-seq (ENCODE ID ENCFF887KKK; antibody: Santa Cruz Biotech sc-150, Lot ID D2315);
(M) A549 dexamethasone 8 hours ChIP-seq (ENCODE ID ENCSR474DCX4; antibody: Santa Cruz Biotech sc-150, Lot ID D2315).
} 
\label{Fig14b}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig15-CEBPG.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the CEBPG transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 CETCH-seq (ENCODE ID ENCSR620VIC);
(B) BPNet predictions over chrM;
(C) HepG2 ChIP-seq (ENCODE ID ENCSR639IIZ);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR094ZCF).
} 
\label{Fig15}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig31-JUN.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the JUN transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 ChIP-seq (ENCODE ID ENCSR000EEK; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(B) BPNet predictions over chrM;
(C) endothelial cell of  umbilical  vein ChIP-seq (ENCODE ID ENCSR000EFA; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR176EXN; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(E) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EDG; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(F) A549 ChIP-seq (ENCODE ID ENCSR996DUT; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(G) K562 ChIP-seq (ENCODE ID ENCSR000EFS; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206);
(H) HepG2 CETCH-seq (ENCODE ID ENCSR747VUU);
(I) H1-hESC ChIP-seq (ENCODE ID ENCSR000ECA; antibody: Santa Cruz Biotech sc-1694, Lot ID C2206).
} 
\label{Fig31}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig32-JUND.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the JUND transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000DJX; GFP-tagged);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR000DJX; antibody: GFP-tagged);
(D) HepG2 ChIP-seq (ENCODE ID ENCSR000EEI; antibody: Santa Cruz Biotech sc-74);
(E) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EDH; antibody: Santa Cruz Biotech sc-74);
(F) HepG2 ChIP-seq (ENCODE ID ENCSR000BGK; antibody: Santa Cruz Biotech sc-74);
(G) SK-N-SH ChIP-seq (ENCODE ID ENCSR000BSK; antibody: Santa Cruz Biotech sc-74);
(H) SK-N-SH ChIP-seq (ENCODE ID ENCSR000EIB; antibody: Santa Cruz Biotech sc-74);
(I) GM18278 ChIP-seq (ENCODE ID ENCSR000DYS; antibody: Santa Cruz Biotech sc-74).
} 
\label{Fig32}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig33-JUND.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the JUND transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HCT116 ChIP-seq (ENCODE ID ENCSR000BSA; antibody: Santa Cruz Biotech sc-74);
(B) H1-hESC ChIP-seq (ENCODE ID ENCSR000BKP; antibody: Santa Cruz Biotech sc-74);
(C) Liver ChIP-seq (ENCODE ID ENCSR837GTK; antibody: Santa Cruz Biotech sc-74);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR000BSU; antibody: Santa Cruz Biotech sc-74);
(E) T47D ChIP-seq (ENCODE ID ENCSR000BVO; antibody: Santa Cruz Biotech sc-74);
(F) A549 ChIP-seq (ENCODE ID ENCSR000BRF; antibody: Santa Cruz Biotech sc-74).
} 
\label{Fig33}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig34-MAFF.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MAFF transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000EGI; antibody: Sigma M8194, Lot ID 125K4837);
(B) BPNet predictions over chrM;
(C) HepG2 ChIP-seq (ENCODE ID ENCSR000EEC; antibody: Sigma M8194, Lot ID 125K4837);
(D) HeLa-S3 ChIP-seq (ENCODE ID ENCSR140DSL; antibody: Sigma M8194, Lot ID 125K4837).
} 
\label{Fig34}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig35-MAFG.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MAFG transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 CETCH-seq (ENCODE ID ENCSR818DQV);
(B) BPNet predictions over chrM;
(C) HepG2 CETCH-seq (ENCODE ID ENCSR708KAA).
} 
\label{Fig35}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig36-MAFK.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MAFK transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) A549 ChIP-seq (ENCODE ID ENCSR541WQI; antibody: Abcam ab50322, Lot ID 904274);
(B) BPNet predictions over chrM;
(C) GM18278 ChIP-seq (ENCODE ID ENCSR000DYV; antibody: Abcam ab50322, Lot ID 904274);
(D) H1-hESC ChIP-seq (ENCODE ID ENCSR000EBS; antibody: Abcam ab50322, Lot ID 904274);
(E) IMR-90 ChIP-seq (ENCODE ID ENCSR000EFH; antibody: Abcam ab50322, Lot ID 904274);
(F) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000ECK; antibody: Abcam ab50322, Lot ID 904274);
(G) K562 ChIP-seq (ENCODE ID ENCSR000EGX; antibody: Abcam ab50322, Lot ID 904274);
(H) MCF-7 ChIP-seq (ENCODE ID ENCSR555PBN; antibody: Abcam ab50322, Lot ID 904274);
(I) HepG2 ChIP-seq (ENCODE ID ENCSR000EDZ; antibody: Santa Cruz Biotech sc-477, Lot ID K1709).
} 
\label{Fig36}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig54-NFE2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the NFE2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000FAF; antibody Santa Cruz Biotech sc-291, Lot ID D1703);
(B) BPNet predictions over chrM;
(C) K562 CETCH-seq (ENCODE ID ENCSR552YGL);
(C) HepG2 CETCH-seq (ENCODE ID ENCSR983FBD);
(C) GM12878 ChIP-seq (ENCODE ID ENCSR000DZY; antibody Santa Cruz Biotech sc-22827).
} 
\label{Fig54}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig20-NFE2L1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the NFE2L1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR632SHZ; GFP-tagged);
(B) BPNet predictions over chrM;
(C) HepG2 CETCH-seq (ENCODE ID ENCSR543SBE; antibody: ).
} 
\label{Fig20}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig42-NFE2L2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the NFE2L2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 ChIP-seq (ENCODE ID ENCSR488EES; antibody: Santa Cruz Biotech sc-13032, Lot ID A1711);
(B) BPNet predictions over chrM;
(C) IMR-90 ChIP-seq (ENCODE ID ENCSR197WGI; antibody: Santa Cruz Biotech sc-13032, Lot ID A1711);
(D) A549 ChIP-seq (ENCODE ID ENCSR584GHV; antibody: Santa Cruz Biotech sc-13032, Lot ID A1711);
(E) HeLa-S3 ChIP-seq (ENCODE ID ENCSR707IUN; antibody: Santa Cruz Biotech sc-13032, Lot ID A1711).
} 
\label{Fig42}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig43-NRL.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the NRL transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR518KLO);
(B) BPNet predictions over chrM.
} 
\label{Fig43}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig8-BHLHE40.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the BHLHE40 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) IMR90 ChIP-seq (ENCODE ID ENCSR957KYB; antibody: Novus NB100-1800, Lot ID A1);
(B) BPNet predictions over chrM;
(C) A549 ChIP-seq (ENCODE ID ENCSR000DYJ; antibody: Santa Cruz Biotech sc-101023);
(D) GM12878 ChIP-seq (ENCODE ID ENCSR987MTA; antibody: Novus NB100-1800, Lot ID A1);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR000BID; antibody: Santa Cruz Biotech sc-101023);
(F) K562 ChIP-seq (ENCODE ID ENCSR000EGV; antibody: Novus NB100-1800, Lot ID A1);
(G) HEK293T ChIP-seq (ENCODE ID ENCSR789GVU; antibody: Novus NB100-1800, Lot ID A1).
} 
\label{Fig8}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig37-MAX.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MAX transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000FAE; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(B) BPNet predictions over chrM;
(C) A549 ChIP-seq (ENCODE ID ENCSR000DYG; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(D) A549 ChIP-seq (ENCODE ID ENCSR000BTJ; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR000EDS; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(F) endothelial cell of umbilical vein ChIP-seq (ENCODE ID ENCSR000EEZ; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(G) HepG2 CETCH-seq (ENCODE ID ENCSR168DYA);
(H) GM18278 ChIP-seq (ENCODE ID ENCSR000DZF; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(I) HCT116 ChIP-seq (ENCODE ID ENCSR000BSH; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(J) H1-hESC ChIP-seq (ENCODE ID ENCSR000EUP; antibody: Santa Cruz Biotech sc-197, Lot ID J0809).
} 
\label{Fig37}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=13.875cm]{Fig38-MAX.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MAX transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EZF; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(B) Ishikawa ChIP-seq (ENCODE ID ENCSR000BTY; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(C) Liver ChIP-seq (ENCODE ID ENCSR521IID; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR000BUL; antibody: Santa Cruz Biotech sc-197, Lot ID J0809);
(E) SK-N-SH ChIP-seq (ENCODE ID ENCSR000BVD; antibody: Santa Cruz Biotech sc-197, Lot ID J0809).
} 
\label{Fig38}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig40-MITF.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MITF transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR797SWM; antibody: Active Motif 39789, Lot ID 11313002);
(B) BPNet predictions over chrM.
} 
\label{Fig40}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig49-SREBF1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the SREBF1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) A549 ChIP-seq (ENCODE ID ENCSR897MYK; antibody: Santa Cruz Biotech sc-8984, Lot ID 10211);
(B) BPNet predictions over chrM;
(C) GM12878 ChIP-seq (ENCODE ID ENCSR000DYU; antibody: Santa Cruz Biotech sc-8984, Lot ID 10211);
(D) HepG2 (10 $\mu$M insulin, 100 $\mu$M 22-hydroxycholesterol, 6 hours post-treatment) ChIP-seq (ENCODE ID ENCSR000EEO; antibody: Santa Cruz Biotech sc-8984, Lot ID 10211);
(E) HepG2 (2 $\mu$M pravastatin, 16 hours post-treatment) ChIP-seq (ENCODE ID ENCSR000EZP; antibody: Santa Cruz Biotech sc-8984, Lot ID 10211).
} 
\label{Fig49}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig45-RFX1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the RFX1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) MCF-7 ChIP-seq (ENCODE ID ENCSR788XNX; antibody: Santa Cruz Biotech sc-10652, Lot ID E1911);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR968GIB; antibody: Santa Cruz Biotech sc-10652, Lot ID E1911);
(D) HepG2 ChIP-seq (ENCODE ID ENCSR928API; antibody: Santa Cruz Biotech sc-10652, Lot ID E1911).
} 
\label{Fig45}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig46-RFX5.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the RFX5 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000EGO; antibody: Rockland 200-401-194, Lot ID 14562);
(B) BPNet predictions over chrM;
(C) IMR-90 ChIP-seq (ENCODE ID ENCSR000EFD; antibody: Rockland 200-401-194, Lot ID 14562);
(D) HepG2 ChIP-seq (ENCODE ID ENCSR000EEA; antibody: Rockland 200-401-194, Lot ID 14562);
(E) SK-N-SH ChIP-seq (ENCODE ID ENCSR000EHY; antibody: Rockland 200-401-194, Lot ID 14562);
(F) GM12878 ChIP-seq (ENCODE ID ENCSR000DZW; antibody: Rockland 200-401-194, Lot ID 14562);
(G) H1-hESC ChIP-seq (ENCODE ID ENCSR000ECF; antibody: Rockland 200-401-194, Lot ID 14562);
(H) A549 ChIP-seq (ENCODE ID ENCSR064LJN; antibody: Rockland 200-401-194, Lot ID 14562);
(I) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000ECX; antibody: Rockland 200-401-194, Lot ID 14562);
(J) MCF-7 ChIP-seq (ENCODE ID ENCSR924TVL; antibody: Rockland 200-401-194, Lot ID 14562).
} 
\label{Fig46}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig16-FOXA1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the FOXA1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) Liver ChIP-seq (ENCODE ID ENCSR324RCI; antibody: Santa Cruz Biotech sc-6553, Lot ID H1209);
(B) BPNet predictions over chrM;
(C) A549 Dex 1 hours ChIP-seq (ENCODE ID ENCSR000BPX; antibody: Santa Cruz Biotech sc-101058, Lot ID A2706);
(D) A549 EtOH 1 hour ChIP-seq (ENCODE ID ENCSR000BRD; antibody: Santa Cruz Biotech sc-101058, Lot ID A2706);
(E) HepG2 CETCH-seq (ENCODE ID ENCSR865RXA);
(F) MCF-7 ChIP-seq (ENCODE ID ENCSR126YEB; antibody: GeneTex GTX100308, Lot ID 39435);
(G) HepG2 ChIP-seq (ENCODE ID ENCSR000BLE; antibody: Santa Cruz Biotech sc-6553, Lot ID H1209);
(H) K562 ChIP-seq (ENCODE ID ENCSR819LHG; antibody: GeneTex GTX100308, Lot ID 39435).
} 
\label{Fig16}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig17-FOXA2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the FOXA2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) Liver ChIP-seq (ENCODE ID ENCSR080XEY; antibody: Santa Cruz Biotech sc-6554, Lot ID L1409);
(B) BPNet predictions over chrM;
(C) A549 Dex 1 hours ChIP-seq (ENCODE ID ENCSR000BRE; antibody: Santa Cruz Biotech sc-6554, Lot ID L1409);
(D) HepG2 CETCH-seq (ENCODE ID ENCSR490AMH);
(E) HepG2 ChIP-seq (ENCODE ID ENCSR000BNI; antibody: Santa Cruz Biotech sc-6554, Lot ID L1409).
} 
\label{Fig17}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig53-ZNF316.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF316 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR167KBO; antibody: Bethyl Labs A303-248A);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR200JYP; antibody: Bethyl Labs A303-249A).
} 
\label{Fig53}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig18-DZIP1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the DZIP1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR895KNN);
(B) BPNet predictions over chrM.
} 
\label{Fig18}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig19-HIVEP1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the HIVEP1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR697WMX);
(B) BPNet predictions over chrM;
(C) K562 CETCH-seq (ENCODE ID ENCSR947PJZ).
} 
\label{Fig19}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig23-ZNF225.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF225 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR075PWK);
(B) BPNet predictions over chrM.
} 
\label{Fig23}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig51-ZNF263.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF263 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR000EWN; antibody: Novus H00010127-A01);
(B) BPNet predictions over chrM;
% NOT REPLICATED
(C) HEK293 ChIP-seq (ENCODE ID ENCSR000EVD; antibody: Novus H00010127-A01);
(D) HepG2 CETCH-seq.
} 
\label{Fig51}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig24-ZNF274.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF274 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR871VNN);
(B) BPNet predictions over chrM;
% NOT REPLICATED
(C) HepG2 ChIP-seq (ENCODE ID ENCSR000EVR; antibody: Abnova H00010782-A01, Lot ID 060729QCS1);
(D) K562 ChIP-seq (ENCODE ID ENCSR000EVX; antibody: Abnova H00010782-M01, Lot ID 08064-4C12);
(E) H1-hESC ChIP-seq (ENCODE ID ENCSR000EUN; antibody: Abnova H00010782-M01, Lot ID 08064-4C12);
(F) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EVG; antibody: Abnova H00010782-A01, Lot ID 060729QCS1); 
(G) HCT116 ChIP-seq (ENCODE ID ENCSR101FJM; antibody: Abnova H00010782-A01, Lot ID 060729QCS1);
(H) HEK293 CETCH-seq (ENCODE ID ENCSR178QVJ);
(I) HEK293 ChIP-seq  (ENCODE ID ENCSR000FCI; antibody: Abnova H00010782-A01, Lot ID 060729QCS1);
(J) GM12878 ChIP-seq (ENCODE ID ENCSR000EUK; antibody: Abnova H00010782-A01, Lot ID 060729QCS1).
} 
\label{Fig24}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig52-ZNF280B.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF280B transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR940EZR);
(B) BPNet predictions over chrM.
} 
\label{Fig52}
\end{figure*}



\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig25-ZNF350.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF350 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR842SRB);
(B) BPNet predictions over chrM;
(C) HEK293 CETCH-seq (ENCODE ID ENCSR854ORP).
} 
\label{Fig25}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig26-ZNF598.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF598 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 ChIP-seq (ENCODE ID ENCSR173NAL; antibody: Sigma F1804, Lot ID SLBK1346V);
(B) BPNet predictions over chrM.
}
\label{Fig26}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig27-ZNF768.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF768 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR181ABP);
(B) BPNet predictions over chrM;
(C) HEK293 CETCH-seq (ENCODE ID ENCSR070HWF).
} 
\label{Fig27}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig28-ZNF839.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF839 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR540LPD).
(B) BPNet predictions over chrM.
}
\label{Fig28}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig29-ZNF891.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ZNF891 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR020CLV).
(B) BPNet predictions over chrM.
% NOT REPLICATED
} 
\label{Fig29}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig22-THAP9.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the THAP9 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR123GPC);
(B) BPNet predictions over chrM.
} 
\label{Fig22}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig21-NFKB2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the NFKB2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR164YJZ);
(B) BPNet predictions over chrM.
} 
\label{Fig21}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig48-RXRA.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the RXRA transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 CETCH-seq (ENCODE ID ENCSR416HDG);
(B) BPNet predictions over chrM;
(C) HepG2 ChIP-seq (ENCODE ID ENCSR000BHU; antibody: Santa Cruz Biotech sc-553, Lot ID C1811);
(D) GM12878 ChIP-seq (ENCODE ID ENCSR000BJD; antibody: Santa Cruz Biotech sc-553, Lot ID C1811);
(E) H1-hESC ChIP-seq (ENCODE ID ENCSR000BJW; antibody: Santa Cruz Biotech sc-553, Lot ID C1811);
(F) Liver ChIP-seq (ENCODE ID ENCSR352QSB; antibody: Santa Cruz Biotech sc-553, Lot ID C1811);
(G) SK-N-SH ChIP-seq (ENCODE ID ENCSR000BVG; antibody: Santa Cruz Biotech sc-553, Lot ID C1811).
} 
\label{Fig48}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig47-RUNX3.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the RUNX3 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) GM12878 ChIP-seq (ENCODE ID ENCSR000BRI; antibody: Santa Cruz Biotech sc-101553, Lot ID B0909);
(B) BPNet predictions over chrM.
} 
\label{Fig47}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig30-HSF1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the HSF1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 forskolin + 1mM pyruvate 6 hours, ChIP-seq (ENCODE ID ENCSR000EET; antibody: Santa Cruz Biotech sc-9144);
(B) BPNet predictions over chrM;
(C) GM12878 ChIP-seq (ENCODE ID ENCSR009MBP; antibody: Santa Cruz Biotech sc-9144);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR062HDL; antibody: Santa Cruz Biotech sc-9144).
} 
\label{Fig30}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig41-MEIS2.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the MEIS2 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR851BNE; antibody: Sigma HPA003256, Lot ID
R89735);
(B) BPNet predictions over chrM.
} 
\label{Fig41}
\end{figure*}



\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig44-PKNOX1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the PKNOX1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR115SMW; antibody: GeneTex GTX114991, Lot ID 40870);
(B) BPNet predictions over chrM;
(C) GM12878 ChIP-seq (ENCODE ID ENCSR711XNY; antibody: GeneTex GTX114991, Lot ID 40870);
(D) HEK293T ChIP-seq (ENCODE ID ENCSR233FAG; antibody: GeneTex GTX114991, Lot ID 40870);
(E) MCF-7 ChIP-seq (ENCODE ID ENCSR986XYK; antibody: GeneTex GTX114991, Lot ID 40870).
} 
\label{Fig44}
\end{figure*}

\section*{Introduction}

Mitochondria contain their own genome\cite{Nass1965} (mtDNA/chrM), usually circular in topology and highly compact, especially in metazoans. The mammalian chrM is around 16-17 kbp in size and encodes for 13 proteins (all of them components of electron transport chains), 22 tRNAs and two rRNAs\cite{Anderson1981,Bibb1981}. It has a peculiar compared to the nuclear genome organization and is replicated, transcribed and regulated by its own dedicated set of information processing factors. The 13 genes and two rRNAs are densely packed, with only one notable non-coding region (NCR) -- the so called displacement or D-loop\cite{Shadel1997}. The D-loop is the site of transcription initiation, which produces long polycistronic transcripts from both strands (referred to as the H- for ``heavy'' and L- for ``light''), and contains three promoters -- a light strand (LSP) and two heavy strand (HSP1 and HSP2) promoters \cite{Cantatore1980,Montoya1982}. Transcription is carried out by the mitochondrial-specific nuclear-encoded POLRMT RNA polymerase\cite{Tiranti1997}, with several additional nuclear genome-encoded factors -- TFAM\cite{Fisher1988,Fisher1985,Wang2013}, TFB1M, and TFB2M\cite{Falkenberg2002,Gaspari2004} -- also involved in the process of initiation\cite{Shutt2011}. 

The modern organization of the mitochondrial genome is the result of a combination of extreme reduction and massive endosymbiotic gene transfer (EGT). Mitochondria originated very early in eukaryote evolution as a result of the establishment of endosymbiosis with their prokaryotic ancestor\cite{Mereschkowski1905,Mereschkowski1910,Sagan1967,Schwartz1978,Yang1985,Gray1999}, which was most likely a member of the $\alpha$-proteobacteria  clade\cite{Andersson1998,Gray1999,Esser2004,Fitzpatrick2006}. Subsequently, the endosymbiont lost the vast majority of its genes, either outright, or through transfer to the nuclear genome, form which then their products were imported back into the mitochondrion.

In addition to the well-characterized bona fide mitochondrial transcription and replication factors, since the 1990s there have been reports suggesting that nuclear transcription factors may also moonlight as direct regulators of events in mitochondria\cite{LeighBrown2010,Rubalcava2023,Marinov2014,Blumberg2014}. These include the glucocorticoid receptor GR\cite{Demonacos1993,Demonacos1995,Koufali2003,Psarra2006}, a 43kDa isoform of the thyroid hormone T$_3$ receptor T$_3$R$\alpha$1 called p43\cite{Wrutniak1995,Casas1999,Enriquez1999a,Enriquez1999b}, the CREB TF\cite{Cammarota1999,Lee2005,Ryu2005,DeRasmo2009}, the tumor suppressor transcription factor p53\cite{Marchenko2000,Yoshida2003,Heyne2004,Achanta2005}, the estrogen receptor ER\cite{Monje2001,Chen2004}, STAT3\cite{Wegrzyn2009}, AP-1 and PPAR$\gamma$2\cite{Casas2000,Ogita2002,Ogita2003}, as well as MEF2D in mouse\cite{She2011}.

However, direct \textit{in vivo} evidence for occupancy of mtDNA by these factors was provided only for a handful by these original studies (e.g. CREB\cite{Lee2005} and p53\cite{Achanta2005}, and it was limited to only the D-loop region. The advent of high resolution techniques for genome-wide profiling of DNA-protein interactions such as ChIP-seq (Chromatin Immunoprecipitation coupled with deep sequencing\cite{Johnson2007,Mikkelsen2007,Barski2007,Robertson2007}) eventually enabled the direct examination of evidence for mtDNA occupancy by a large number of nuclear TFs. 

At the end of the second phase of the ENCODE Project, we and others\cite{Blumberg2014,Marinov2014} carried out a comprehensive survey of the existing at the time ChIP-seq datasets generated by the ENCODE, mouseENCODE and modENCODE efforts\cite{ENCODE101,ENCODE2012,mouseENCODE101,Gerstein2010,Negre2011,modENCODE2010,Yue2014} in human, mouse, the worm \textit{C. elegans} and the fly \textit{D. melanogaster}. Eight human TFs were identified as showing strong evidence for mtDNA occupancy (JUN, JUND, CEBPB, MAX, MafF, MafK, NFE2 and RFX5), three mouse TFs (MafK, MafF and Usf2), and no fly or worm ones. Furthermore, Blumberg et al.\cite{Blumberg2014} demonstrated directly the localization to mitochondria of JUN and JUND in HepG2 cells using immuno-gold labeling and electron microscopy while Marinov et al.\cite{Marinov2014} showed MAFK localizing to mitochondria using immunocytochemical staining. Examination of available ChIP-seq data for TFs previously proposed to act in mitochondria (GR, ER$\alpha$, CREB, STAT3, p53) found no putative occupancy sites.

However, these studies did not reveal any obvious mechanisms through which these nuclear TFs might act to regulate mitochondrial transcription, as all the identified ChIP-seq peaks are away from the D-loop in the middle of the transcriptional units. The D-loop itself shows up as ``enriched'' in practically all ChIP-seq datasets, but this is almost certainly an experimental artifact as the ChIP signal there does not show the characteristics of proper occupancy sites. Thus the question about the potential role of nuclear TFs in mitochondria remains open and unresolved\cite{Leigh-Brown2010,Rubalcava2023}.

With the completion of the third\cite{ENCODE2020} and fourth phases of the ENCODE project, a vastly expanded collection of ChIP-seq datasets has now become available, encompassing an order-of-magnitude larger sampling of the human TF repertoire. Furthermore, many TFs have now been assayed using multiple different reagents or using endogenous tagging, thus potentially providing distinct lines of evidence for mtDNA occupancy, and powerful deep learning-based tools for analyzing the sequence patterns driving TF occupancy and predicting it from sequence have been developed\cite{Ching2018}. In this study, we take advantage of these resources, survey the expanded ENCODE collection, and identify 50 nuclear TFs plus two other chromatin proteins exhibiting more or less robustly supported peaks over chrM. On the other hand, the picture revealed by the expanded collection is more complicated than previously perceived as in many cases occupancy profiles are not replicated in all cases where multiple immune reagents have been used to assay the same TF. We discuss the currently most reliable set of mtDNA-associated nuclear TFs, as well as the evidential criteria for establishing chrM occupancy using ChIP and other experimental methods.

% To do: 
% - match against DNAse footprints

\section*{Results}

\subsection*{Evaluating the evidence for mitochondrial occupancy of nuclear TFs in the expanded ENCODE ChIP-seq collection}

According to the latest census of human transcription factors\cite{Lambert2018}, the human genome encodes 1,742 sequence-specific TFs, belonging to $\sim$60 different families defined by their DNA binding domains (DBDs). 

Our previous analysis of the dataset collection generated as part of the second phase of the ENCODE Project encompassed a total of 151 transcription factors, which represents less than 10\% of the total. After the fourth phase of ENCODE, the number of available datasets is now greatly expanded and covers $\sim$44\% of the known TFs.

In order to evaluate (and also reevaluate previous observations) putative physical associations of nuclear TFs with the mitochondrial genome, we examined 6,153 ChIP-seq datasets for 942 targets, of which 763 are sequence-specific TFs\cite{Lambert2018} (Figure \ref{Fig1}A). The additional 179 are non-sequence specific chromatin proteins, such as histone modifying enzymes and chromatin remodellers. 

The most recent ENCODE collection is also not merely a quantitative expansions. Multiple TFs have been assayed with different antibodies, which is extremely valuable because, especially when the antibodies are polyclonal, the possibility that mtDNA ChIP-seq peaks are the result of non-specific pull down can never be excluded (although that would still mean that some previously not known to do so protein is specifically occupying those regions of the mitochondrial genome). Many of the newly available TFs, but also some that have been mapped previously, have now been assayed using endogenous tagging with FLAG, GFP or HA tags, most commonly using CRISPR epitope tagging (CETCH-seq\cite{Savic2015}) and site-specific recombination\cite{OGorman1991}. We note that CETCH-eq, tagging is carried out at the stop codon of the gene, i.e. at its C-terminus, thus it is unlikely that it would affect import into mitochondria, which is classically mediated by N-terminal targeting sequences\cite{Dudek2013,Busch2023}; however, that can nevertheless not be completely excluded as a possibility.

We updated our pipeline for analyzing mitochondrial occupancy in several ways in order to allow for more comprehensive characterization of the current TF compendium. 

First, we previously mapped reads against a joint nuclear and mitochondrial genomic index, and excluded all reads mapping to multiple locations within that combined genomic space. A substantial portion of the human chrM in the context of the \verb|hg38| assembly is affected by such mappability limitations (Supplementary Figure \ref{FigS1}). However, because of the very high copy number of mitochondrial genomes in a given cell relative to the diploid nuclear genome\cite{Bogenhagen1974}, peaks observed over chrM are nearly always much stronger than even the very top nuclear ChIP-seq peaks, as previously demonstrated\cite{Marinov2014}. Furthermore, other hallmarks of TF occupancy, such as chromatin accessibility peaks and high levels of histone marks associated with active regulatory elements, which would be expected if chrM peaks arose from mitochondrial sequences that have been inserted in the nuclear genome (so called NUMTs\cite{HazkaniCovo2010}), are not seen over the peaks observed over the mitochondrial genome. This makes it highly unlikely that they arise from nuclear TF occupancy over NUMTs. For these reasons, we now evaluate putative mitochondrial occupancy based on read alignments generated entirely in mitochondrial space.

Second, in our past work we sought corroborating evidence for the observed ChIP-seq profiles in the presence or absence of the cognate sequence recognition motifs for each TF. However, these motifs are often very short and degenerate, and are thus only a small fraction of them is actually occupied in cells. In this work, we leverage the power of interpretative deep learning models to generate more reliable and specific predictions of TF occupancy profiles over chrM, which we then compare to experimental measurements. We use the state-of-the-art BPNet\cite{BPNet} profile models, which take as input genomic sequence and the forward- and reverse-strand ChIP-seq profiles, and then learn to predict these profiles as a function of genomic sequence. As part of the overall ENCODE effort, we have trained such models over the nuclear genome for all TFs for which data is available, and we used these models to predict chrM ChIP-seq profiles (see Methods for details).

Figure \ref{Fig1}B shows the observed chrM profiles for all 6,153 datasets. As discussed in our previous work\cite{Marinov2014}, the D-loop region appears as strongly ``enriched'' in nearly all ChIP datasets; this is certainly an artifact in almost all cases because the observed forward- and reverse-strand profiles do not exhibit the expected from true occupancy asymmetry around a punctate binding site\cite{Kharchenko2008,LandtMarinovKundaje2012,Marinov2014CCC,Marinov2017}. It is most likely that the unique triple-stranded structure of the D-loop results in preferential enrichment in sequencing libraries. We also observe a few regions of weakly elevated signal in the middle of chrM, which are also present in the majority of datasets, and are also unlikely to represent true occupancy events. 

Disregarding these signals, we find some evidence for chrM occupancy for 50 sequence-specific TFs, which we discuss in detail below. In addition, two of the 179 non-sequence specific chromatin proteins also showed evidence for putative association with mtDNA.

\subsection*{bZIP TFs}

The TF family with the largest and most notable set of members with strong chrM peaks is the bZIP (Basic Leucine Zipper) domain-containing proteins. In humans, 72 such TFs are annotated in the genome, and for 39 of them there is ENCODE ChIP-seq data. 

Remarkably, nearly half of them -- 19/39 exhibit evidence fo mtDNA occupancy (Figures \ref{Fig3}--\ref{Fig43}). These TFs are ATF2, ATF3, ATF4, ATF7, CREB1, FOS, FOSL1, FOSL2, CEBPB, CEBPG, JUN, JUND, MAFF, MAFG, MAFK, NFE2, NFE2L1, NFE2L2, and NRL.

Figure \ref{Fig3}A shows the chrM ChIP-seq profile for the ATF2 TF in the K562 cell line, revealing a strong peak, with the classic strand asymmetry features of true sequence-specific TF occupancy, in the region around the \textit{MT-ND3} and \textit{MT-ND4L} genes and the \textit{MT-TR} tRNA gene. The BPNet-predicted profile corroborates the existence of such a peak, although BPNet also predicts high ChIP-seq signal in several other locations, which is not observed in the actual data (Figure \ref{Fig3}B). The same peak is observed in HepG2 cells using the same antibody (Figure \ref{Fig3}E), and in site-recombination-tagged experiment in the HEK293 cell line (Figure \ref{Fig3}D). It is not observed in H1-hESC using a different antibody  (Figure \ref{Fig3}C), and also in a CRISPR-mediated endogenous tagging experiment in HepG2 cells (Figure \ref{Fig3}F). Thus for ATF2 we see evidence for occupancy using two different reagents (an $\alpha$-ATF2 antibody and GFP-tagging), but not with another $\alpha$-ATF2 antibody and FLAG-tagging.

Figure \ref{Fig4}A shows the chrM ChIP-seq profile for the ATF3 TF in the K562 cell line. In this case, at least four strong peaks are observed -- the same one around \textit{MT-ND3}/\textit{MT-ND4L} seen for ATF2, but also one over \textit{MT-ATP6}, one over \textit{MT-ND4}, and another over \textit{MT-CYB}. All these peaks are corroborated by BPNet predictions (Figure \ref{Fig4}B), although here too BPNet predicts additional occupancy peaks. However, none of the experiments in other cell lines -- H1-hESC, HCT116, HepG2, A549, GM12878, liver, and K562 again -- carried out with a different antibody exhibit these peaks (Figure \ref{Fig4}C--I), and neither does CRISPR-mediated FLAG-tagging  (Figure \ref{Fig5}J).

Figure \ref{Fig5}A shows the chrM ChIP-seq profile for the ATF4 TF in the K562 cell line. In this case we observe a single strong peak, at the same location as the ATF3 peak over the \textit{MT-CYB} gene. Unfortunately, we were not able to train a good model for this TF, thus we do not have BPNet predictions over chrM for it (Figure \ref{Fig5}B). This peak is not seen in a CRISPR-mediated FLAG-tagging experiment in HepG2 cells (Figure \ref{Fig5}C).

Figure \ref{Fig6}A shows the chrM ChIP-seq profile for the ATF7 TF in the MCF-7 cell line. Its profile over chrM is similar to that of ATF3 (Figure \ref{Fig4}A), with four peaks. BPNet models corroborate the strong peak over \textit{MT-ND3}/\textit{MT-ND4L}, but are less concordant elsewhere in the genome. These peaks are also seen in GM12878 and K562 ChIP-seq experiments generated with the same antibody (Figure \ref{Fig6}C-D), but not in HepG2 ChIP-seq carried out with a different antibody (Figure \ref{Fig6}E).

Figure \ref{Fig9}A shows the chrM ChIP-seq profile for the CREB1 TF in the HepG2 cell line. CREB1 is notable for having been previously proposed to localize to mitochondria and play a functional role there\cite{DeRasmo2009,Lee2005}, and specifically to bind to the D-loop \cite{Ryu2005}. Just as in our previous effort\cite{Marinov2014}, we see no evidence that is unlikely to be an artifact for D-loop occupancy, but we observe a strong peak over the \textit{MT-ND1} gene, another one over \textit{MT-CO3} and several weaker others elsewhere in the genome. These match BPNet predictions qualitatively, but the magnitudes of observed and predicted signals differ significantly (Figure \ref{Fig9}B). The putative occupancy profiles are replicated in MCF-7 cells using the same antibody (Figure \ref{Fig9}D), in CRISPR FLAG-tagged HepG2 and GM23338 cells  (Figure \ref{Fig9}C and E), and also in K562 cells using a different antibody (Figure \ref{Fig9}F). However, the latter antibody was also used in datasets in GM12878, H1-hESC and Ishikawa cells (Figure \ref{Fig9}G-I) resulting in a flat profile over chrM, as is the case with CETCH-seq experiment in K562 cells  (Figure \ref{Fig9}J).

Figure \ref{Fig12}A shows the chrM ChIP-seq profile for the FOS TF in the MCF-7 cell line. Two peaks are observed -- a strong one over the \textit{MT-ATP6} gene and a weaker one at the same location as the ATF2, ATF3 and ATF7 peaks over \textit{MT-ND3}/\textit{MT-ND4L}. Both are matched by BPNet predictions while several other BPNet-predicted peaks are not observed in the data (Figure \ref{Fig12}B). The pattern is replicated in K562 cells using the same antibody  (Figure \ref{Fig12}C), but not in IMR90, endothelial cells of the umbilical
vein, GM12878 or HeLaS3, all using the same antibody (Figure \ref{Fig12}D--G).

Figure \ref{Fig39}A shows the chrM ChIP-seq profile for the FOSL1 TF in the K562 cell line; this experiment uses GFP-tagged FOSL1. The same two peaks as for FOS are observed, but neither is particularly strong. BPNet predicts a large number of peaks all over mtDNA, which are not seen in the data (Figure \ref{Fig39}B). These peaks are not replicated by K562 CETCH-seq, HepG2 CETCH-seq, and ChIP-seq using an $\alpha$-FOSL1 antibody in H1-hESC and HCT116  (Figure \ref{Fig39}C--F).

Figure \ref{Fig13}A shows the chrM ChIP-seq profile for the FOSL2 TF in the A549 cell line, using CRISPR-tagged cells. Again, the same two peaks are observed as for FOS and FOSL1. These are also predicted by BPNet (Figure \ref{Fig13}B), but the strongest BPNet prediction -- over \textit{MT-CYB} is not observed in the data. However, other experiments using two different antibodies in A549, MCF-7, HepG2 and SK-N-SH as well as CETCH-seq in MCF-7 and HepG2 do not shows these peaks (Figure \ref{Fig13}C--J).

Figure \ref{Fig14a}A shows the chrM ChIP-seq profile for the CEBPB TF in the K562 cell line, using CRISPR-tagged cells. A strong peak is observed over the \textit{MT-ND4} gene, and a weaker one over \textit{MT-CO2}, as well as a few other weak peaks. These are corroborated by BPNet (Figure \ref{Fig14a}B); in fact BPNet predicts two distinct binding sites over \textit{MT-ND4} and two peaks associated with strand asymmetry are also seen in the CETCH-seq data. BPNet also predicts numerous other peaks that are not observed experimentally. A large number of different additional experiments are available for CEBPB (Figure \ref{Fig14a}C--J and Figure \ref{Fig14b}), using two different antibody lots. The observed putative mtDNA occupancy is replicated in IMR90 (Figure \ref{Fig14a}D), HeLaS3  (Figure \ref{Fig14a}F), HepG2  (Figure \ref{Fig14a}J), A549  (Figure \ref{Fig14b}E), but not in other experiments for HepG2 and A549 or the other cell lines -- MCF-7, HCT116, Ishikawa, H1-hESC, GM12878 and non-tagged K562 -- that have been assayed.

Figure \ref{Fig15}A shows the chrM ChIP-seq profile for the CEBPG TF in the K562 cell line, using CRISPR-tagged cells. A similar pattern to CEBPB is observed, but with a stronger peak over the \textit{MT-CYB} gene. BPNet predictions match the observed profile (Figure \ref{Fig15}B). However, CRISRP-tagged MCF-7 and HepG2 cells do not show these peaks (Figure \ref{Fig15}C-D).

Figure \ref{Fig31}A shows the chrM ChIP-seq profile for the JUN TF in the HepG2 cell line. Two strong peaks are observed for JUN -- one over the same \textit{MT-ND3}\textit{MT-ND4L} region as seen for many other bZIP factors, and another over \textit{ND4}. A weaker peak is seen over \textit{MT-ATP6}. BPNet predicts all these peaks, as well as many others (Figure \ref{Fig31}B). These peaks are also observed in endothelial cells of the umbilical veins (Figure \ref{Fig31}C), where it is to be noted that the \textit{MT-ATP6} peak is stronger than the \textit{MT-ND4L} one, but not in any of the other cell lines assayed -- MCF-7, HeLa-S3, A549, K562, H1-hESC and in a HepG2 CETCH-seq sample (Figure \ref{Fig31}D--I). All of these ChIP-seq experiments were carried out with the same antibody. JUN was one of the factors whose presence in mitochondria was conclusively confirmed through immunogold electron microscopy previously\cite{Blumberg2014}, thus the discrepancy between HepG2 ChIP-seq and CETCH-seq and it being observed only in two seemingly unrelated cell lines and not in any of the others are particularly puzzling observations.

Figure \ref{Fig32}A shows the chrM ChIP-seq profile for the JUND TF in the K562 cell line. The same three peaks are observed as for JUN, and these are also corroborated by BPNet predictions (Figure \ref{Fig32}B). Here too we observed discordance in the available datasets as these peaks are also seen in HepG2 (Figure \ref{Fig32}D) and SK-N-SH (Figure \ref{Fig32}H) cells, but not in any of the other ENCODE experiments for JUND -- HeLaS3, GM12878, HCT116, H1-hESC, liver, MCF-7, T47D, A549, and most puzzling, additional datasets in K562, HepG2 and SK-N-SH (Figure \ref{Fig32}C,E-G,I and Figure \ref{Fig33}). All of these experiments were carried out with the same Santa Cruz Biotech sc-74 antibody, except for the K562 experiments, both of which used GFP-tagged JUND. however, this antibody is polyclonal and there is no information available whether the same lot was used. It is possible the discrepancy arises as a result of lot differences; the other possibility is that the experimental protocols used are not the same as the discordant samples arise from two different production labs. As is the case with JUN, JUND's presence in mitochondria was previously verified by immunogold electron microscopy in HepG2 cells\cite{Blumberg2014}.

Figure \ref{Fig34}A shows the chrM ChIP-seq profile for the MAFF TF in the K562 cell line. Several peaks are observed -- over the \textit{MT-CO1} and \textit{MT-ND5} genes as well as over the tRNA cluster between \textit{MT-ND4} and \textit{MT-ND5}. These are also predicted by BPNet (Figure \ref{Fig34}B) together with multiple other peaks not observed in the ChIP data. The first two peaks are also observed in HepG2 and HeLa-S3 cells (all experiments carried out with the same antibody) but the latter is not.

Figure \ref{Fig35}A shows the chrM CETCH-seq profile for the MAFG TF in the K562 cell line. The three peaks observed for MAFF are also present in the MAFG profile, but in addition peaks are present over the \textit{MT-CYB} and \textit{MT-ATP6} genes as well as a weaker one over the 16S rRNA. These peaks are corroborated by BPNet predictions (Figure \ref{Fig35}B), but are not seen in a HepG2 CETCH-seq experiment.

Figure \ref{Fig36}A shows the chrM ChIP-seq profile for the MAFK TF in the A549 cell line. The same peaks are observed as those for MAFF, and they are corroborated by BPNet predictions (Figure \ref{Fig36}B). They are also observed in GM12878, IMR-90, HeLa-S3, K562, MCF-7 and HepG2 cells, but not in H1-hESC cells (Figure \ref{Fig36}C--I). Of note, they are seen in datasets generated with two different antibodies, and MAFK was previously shown to localize to mitochondria using immunocytochemical staining.

Figure \ref{Fig54}A shows the chrM ChIP-seq profile for the NFE TF in the K562 cell line. Five peaks are observed -- over the 16S rRNA gene, over \textit{MT-ATP6}, over \textit{MT-ND3}, over the tRNA cluster between \textit{MT-ND4} and \textit{MT-ND5}, and over \textit{MT-CYB}. These are all sites where peaks are seen also for other bZIP factors. Most of them are predicted by BPNet (Figure \ref{Fig54}B), and they also seen in K562 CETCH-seq experiment (Figure \ref{Fig54}C). On the other hand, CETCH-seq in HepG2 (Figure \ref{Fig54}D) and ChIP-seq in GM12878 generated using a different antibody (Figure \ref{Fig54}E) show no peaks.

Figure \ref{Fig20}A shows the chrM ChIP-seq profile for the NFE2L1 TF in the K562 cell line. Three strong peaks are observed in this case -- over the 16S rRNA gene, over \textit{MT-ATP6}, and over the tRNA cluster between \textit{MT-ND4} and \textit{MT-ND5}. BPNet predicts the latter two but not the one over the 16S rRNA (Figure \ref{Fig20}B). A HepG2 CETCH-seq experiment does not exhibit the same pattern (Figure \ref{Fig20}C).

Figure \ref{Fig42}A shows the chrM ChIP-seq profile for the NFE2L2 TF in the K562 cell line. It is very similar to what is observed for NFE2L1, and in this case too BPNet does not predict a 16S rRNA peak (Figure \ref{Fig42}B). These peaks are also seen in IMR-90 cells (Figure \ref{Fig42}C) and weakly in A549 and HeLaS3 cells (Figure \ref{Fig42}D--E), using the same antibody for all experiments.

Figure \ref{Fig43}A shows the chrM ChIP-seq profile for the NRL TF in a HepG2 CETCH-seq experiment. In this case, multiple, and often potentially complex multisummit peaks are observed all over the genome. They generally match BPNet predictions (Figure \ref{Fig43}B).

\subsection*{bHLH TFs}

The second major group of TFs exhibit evidence for mtDNA occupancy are the basic helix–loop–helix (bHLH) transcription factors. Of 122 bHLH factors annotated in the genome, ChIP-seq data is available for 36, of which peaks over chrM are observed for four -- BHLHE40, MAX, MITF, and SREBF1.

Figure \ref{Fig8}A shows the chrM ChIP-seq profile for the BHLHE40 TF in the K562 cell line. Two peaks are observed -- over the \textit{MT-ATP6} gene and over \textit{NT-ND3}/\textit{MT-DN4L}. However, the observed profile does not match the BPNet predicted one (Figure \ref{Fig8}B), and is also not seen in any other cell line (Figure \ref{Fig8}C--G). In the GM12878 cell line a different peak is observed over the \textit{MT-ND5} gene (Figure \ref{Fig8}D); the same antibody was used for both the IMR90 and GM12878 experiments, but a different antibody was used in A549 and HepG2 cells.

Figure \ref{Fig37}A shows the chrM ChIP-seq profile for the MAX TF in the K562 cell line. Strong peaks are observed over the 16S rRNA gene and over \textit{MT-CO3}, which are also predicted by BPNet (Figure \ref{Fig37}B) together with a number of other peaks not seen in the data. Many different additional experiments are available for MAX (Figures \ref{Fig37}C--J and and \ref{Fig38}) -- all generated with the same antibody, but including experiments from different production groups. These peaks are also seen in endothelial cells of umbilical vein (Figure \ref{Fig37}F) and H1-hESC (Figure \ref{Fig37}J), generated by two different productions groups, but not in the rest of the experiments -- A549, HepG2 (ChIP and CETCH-seq), GM12878, HCT116, HeLaS3, Ishikawa, liver, MCF-7 and SK-N-SH.

Figure \ref{Fig42}A shows the chrM ChIP-seq profile for the MITF TF in the K562 cell line. Peaks are observed over the \textit{MT-ND2} gene and on the boundary between \textit{MT-ATP6} and \textit{MT-CO3}. However, these do not match the predicted BPNet profile (Figure \ref{Fig42}B).

Figure \ref{Fig49}A shows the chrM ChIP-seq profile for the SREBF1 TF in the A549 cell line. In this case, two potential peaks are seen at the very beginning of the 12S rRNA gene and another one over the \textit{MT-C03} gene. These are predicted by BPNet Figure \ref{Fig49}B, together with many other peaks not observed in the data.

\subsection*{RFX TFs}

The human genome encodes eight RFX TFs, of which ChIP-seq data now exists for four. Two of these shows evidence for mtDNA occupancy -- RFX1 and RFX5.

Figure \ref{Fig45}A shows the chrM ChIP-seq profile for the RFX1 TF in the MCF-7 cell line. One peak is observed -- over the \textit{MT-CYB} gene -- where multiple peaks summits are also predicted by BPNet (Figure \ref{Fig45}B). This profile is also observed in K562 cells  (Figure \ref{Fig45}C), but not in HepG2 (Figure \ref{Fig45}D).

Figure \ref{Fig46}A shows the chrM ChIP-seq profile for the RFX5 TF in the K562 cell line. Three areas of elevated signal are observed -- over \textit{MT-CO2}, in the beginning of \textit{ND5}, and in the \textit{MD-ND3}/\textit{MD-ND4L} region. BPNet predicts multiple strong peaks (Figure \ref{Fig46}B), two of which match the \textit{MT-CO2} and \textit{MD-ND3}/\textit{MD-ND4L} peaks, but not the \textit{ND5} one. This pattern is also seen in IMR90 cells (Figure \ref{Fig46}C). In HepG2 cells, a different profile is observed -- a peak over the \textit{MT-ND1} gene (Figure \ref{Fig46}D). No peaks are seen in the available other experiments -- SK-N-SH, GM12878, H1-hESC, A549, HeLa-S3, MCF-7  (Figure \ref{Fig46}E--J), all of which were generated using the same antibody.

\subsection*{Forkhead TFs}

A total of 50 forkhead TFs are encoded in the human genome, for which 15 have been assayed by ENCODE. Two of them -- FOXA1 and FOXA2 -- show putative evidence for mtDNA occupancy.

Figure \ref{Fig16}A shows the chrM ChIP-seq profile for the FOXA1 TF in liver. A peak is observed over the tRNA cluster between \textit{MT-ND4} and \textit{MT-ND5}, which is also predicted by BPNet (Figure \ref{Fig16}), together with many other peaks not observed in the data. However, HepG2 ChIP-seq carried out with the same antibody does not show the same peak (Figure \ref{Fig16}D), and neither does HepG2 CETCH-seq  (Figure \ref{Fig16}E) not do ChIP-seq datasets for A549, MCF-7 and K562 cells generated two other different antibodies  (Figure \ref{Fig16}C-D and F-H).

Figure \ref{Fig17}A shows the chrM ChIP-seq profile for the FOXA2 TF in liver. The same peak is observed as for FOXA1, but it is not strongly predicted by BPNet (Figure \ref{Fig17}B). As with FOXA1 it is not seen in cell line data -- A549 cells, HepG2 CETCH-seq and HepG2 ChIP-seq (Figure \ref{Fig17}C--E).

\subsection*{C2H2 zinc finger TFs}

The largest TF family in mammals is the C2H2 zinc finger factors. The human genome encodes 774 of these, of which 419 have been now assayed by ENCODE, and 12 show some evidence for mtDNA occupancy -- DZIP1, HIVEP1, ZNF225, ZNF274, ZNF350, ZNF598, ZNF768, ZNF839, ZNF891, ZNF263, ZNF280B, ZNF316. Most of these experiments have been carried out using endogenous epitope tagging as specific antibodies for all ZNFs are not available.

Figure \ref{Fig53}A shows the chrM ChIP-seq profile for the ZNF316 TF in the K562 cell line. Two strong peaks are observed -- over the \textit{MT-CO1} and \textit{MT-ND5} genes, and several weaker ones elsewhere around chrM. These are predicted by BPNet models (Figure \ref{Fig53}B). Howevr, a second K562 experiment, carried out by the same production group but with a different antibody does not exhibit any peaks over chrM.

The DZIP1, HIVEP1, ZNF225, ZNF263, ZNF274, ZNF280B, ZNF350, ZNF598, ZNF768, ZNF839, ZNF891,  all display a similar pattern over chrM, in, respectively, HepG2 CETCH-seq (Figure \ref{Fig18}A), HepG2 CETCH-seq (Figure \ref{Fig19}A), HepG2 CETCH-seq (Figure \ref{Fig23}A), K562-ChIP-seq (Figure \ref{Fig51}A), HepG2 CETCH-seq (Figure \ref{Fig24}A), HepG2 CETCH-seq (Figure \ref{Fig52}A), HepG2 CETCH-seq (Figure \ref{Fig25}A), HepG2 ChIP-seq (Figure \ref{Fig26}A), HepG2 CETCH-seq (Figure \ref{Fig27}A), HepG2 CETCH-seq (Figure \ref{Fig28}A), and HepG2 CETCH-seq Figure \ref{Fig29}A). Nine out of eleven of these datasets are the result of endogenous epitope tagging experiments in HepG2 cells, but two are conventional ChIP-seq using TF-specific antibodies. They all display multiple peaks all over the length of the mitochondrial genome, and they are all generally matching the predicted BPNet profiles (Figure \ref{Fig18}B; Figure \ref{Fig19}B; Figure \ref{Fig23}B; Figure \ref{Fig51}B; Figure \ref{Fig24}B; Figure \ref{Fig52}B; Figure \ref{Fig25}B; Figure \ref{Fig26}B; Figure \ref{Fig27}B; Figure \ref{Fig28}B; Figure \ref{Fig29}B). 

For most of these TFs, these are the only datasets available, but in the cases where additional experiments exist, the observed peaks are not replicated -- K562 CETCH-seq for HIVEP1 (Figure \ref{Fig19}C); HEK293 ChIP-seq (Figure \ref{Fig23}C) and HepG2 CETCH-seq (Figure \ref{Fig23}D) for ZNF263; HepG2, K562, H1-hESC, HeLa-S3, HCT116, HEK293, and GM12878 ChIP-seq and HEK293 CETCH-seq for ZNF274 (Figure \ref{Fig23}C--J); HEK293 ChIP-seq for ZNF350 (Figure \ref{Fig25}C); HEK293 ChIP-seq for ZNF768 (Figure \ref{Fig27}C). 

\subsection*{THAP finger TFs}

The human genome encodes 12 THAP finger TFs, for five of which datasets exist in the ENCODE collection. 

Figure \ref{Fig22}A shows the chrM CETCH-seq profile for the THAP9 TF in the HepG2 cell line. This factor displays largely the same profile as most of the C2H2 zinc finger TFs discussed above, and it too matches BPNet predictions (Figure \ref{Fig22}B).

\subsection*{Rel TFs}

Of the ten Rel TFs in the human genome, six have been assayed by ENCODE.

Figure \ref{Fig21}A shows the chrM CETCH-seq profile for the NFKB2 TF in the HepG2 cell line. This dataset too exhibits a similar pattern as THAP9 and most of the C2H2 zinc finger TFs, matched by BPNet predictions (Figure \ref{Fig21}B).

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig10-CUX1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the CUX1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) MCF-7 ChIP-seq (ENCODE ID ENCSR017CEO; antibody: Santa Cruz Biotech sc-6327, Lot ID E0709);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR000EFO; antibody: Santa Cruz Biotech sc-6327, Lot ID E0709);
(D) K562 CETCH-seq (ENCODE ID ENCSR178NTX);
(E) GM12878 ChIP-seq (ENCODE ID ENCSR000DYR; antibody: Santa Cruz Biotech sc-6327, Lot ID E0709).
} 
\label{Fig10}
\end{figure*}

\begin{figure*}
\begin{center}
\includegraphics[width=18.5cm]{Fig11-E2F1.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the E2F1 transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR720HUL; antibody: GeneTex GTX70165, Lot ID 19267);
(B) BPNet predictions over chrM;
(C) K562 ChIP-seq (ENCODE ID ENCSR563LLO; antibody: Cell Signaling 3742S, Lot ID 4);
(D) MCF-7 ChIP-seq (ENCODE ID ENCSR000EWX; HA-modified E2F1);
(E) HeLa-S3 ChIP-seq (ENCODE ID ENCSR000EVJ; antibody: Millipore 05-379).
(F) HepG2 CETCH-seq (ENCODE ID ENCSR717ZZW);
(G) HeLa-S3 CETCH-seq (ENCODE ID ENCSR000EVM).
} 
\label{Fig11}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig2-ARID1B.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ARID1B transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE ID ENCSR822CCM; antibody: Bethyl Labs A301-046A);
(B) BPNet predictions over chrM.
} 
\label{Fig2}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=9.25cm]{Fig50-TFAP2C.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the TFAP2C transcription factor}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM. CETCH-seq; ENCODE ID ENCSR742RUA. 
} 
\label{Fig50}
\end{figure*}


\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig8-C11orf30.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the C11orf30 protein}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) K562 ChIP-seq (ENCODE IDENCSR350XWY ; antibody: Bethyl Labs A300-253A, Lot ID 2); 
(B) BPNet predictions over chrM.
}
\label{Fig8b}
\end{figure*}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=18.5cm]{Fig7-ATM.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Evidence for mitochondrial genome occupancy by the ATM protein}. Black and yellow tracks show the forward- and reverse-strand ChIP-seq coverage over chrM.
(A) HepG2 ChIP-seq (ENCODE ID ENCSR859JGF; antibody: Bethyl Labs A300-135A);
(B) BPNet predictions over chrM.
} 
\label{Fig7}
\end{figure*}

\begin{figure*}
\begin{center}
\begin{minipage}[c]{0.60\linewidth}
\includegraphics[width=10cm]{Fig55-summary.png}
\end{minipage}\hfill
\begin{minipage}[c]{0.40\linewidth}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Summary of the available evidence for the physical association of nuclear TFs with the mitochondrial genome.} All TFs with ChIP-seq evidence for chrM occupancy are listed in the ``any'' column. ``BPNet confirmed'' indicates that the observed ChIP-Seq pattern is corroborated in its key aspects by BPNet models. The ``discordant'' TFs are those for which not all available ChIP-seq experiments show chrM peaks.
}
\label{Fig55}
\end{minipage}
\end{center}
\end{figure*}

\subsection*{Nuclear receptors}

Of the 46 human nuclear receptors, 26 have been assayed by ENCODE. 

Figure \ref{Fig48}A shows the chrM CETCH-seq profile for the RXRA TF in the HepG2 cell line. This is another case of the same pattern observed for most of the C2H2 zinc finger TFs, THAP9 and NFKB2, and it too largely matches BPNet predictions (Figure \ref{Fig48}B).  This pattern is not replicated in any of the ChIP-seq datasets available for RXRA in HepG2, GM12878, H1-hESC, liver and SK-N-SH, generated using $\alpha$-RXRA antibodies.

\subsection*{Runt TFs}

Three RUNT TFs are encoded in the human genome. Two of these have been assayed by ENCODE.

Figure \ref{Fig47}A shows the chrM ChIP-seq profile for the RUNX3 TF in the GM12878 cell line. One strong peak is observed over the \textit{MT-ND5} gene, two weaker ones over \textit{MT-CO1}, as well as several other loci of slight enrichment. We were not able to train a high quality BPNet model for this TF (Figure \ref{Fig47}B).

\subsection*{HSF TFs}

ENCODE has assayed three out of eight HSF TFs. 

Figure \ref{Fig30}A shows the chrM ChIP-seq profile for the HSF1 TF in HepG2 cells treated with forskolin + 1mM pyruvate. One peak is observed over the \textit{MT-CO2} gene, but it is not corroborated by BPNet predictions (Figure \ref{Fig30}B) and is not replicated in HSF1 ChIP-seq datasets in GM12878 and MCF-7 cells (Figure \ref{Fig30}C-D).

\subsection*{Homeodomain TFs}

Homeodomain transcription factors are the second largest class of TFs in mammalian genomes. The human genome encodes 202 of them, plus seven CUT Homeodomain TFs, 16 POU Homeodomain TFs, and nine Paired-box Homeodomain TFs (treated separately in the classification followed here\cite{Lambert2018}). We observe evidence for mtDNA occupancy for two Homeodomain TFs and one CUT Homeodomain TF.

Figure \ref{Fig41}A shows the chrM ChIP-seq profile for the MEIS2 TF in the K562 cell line. One moderate peak is observed over the \textit{MT-ND1} gene. BPNet predicts elevated signal over that region, but also over many other sites in the mitochondrial genome (Figure \ref{Fig41}B).

Figure \ref{Fig44}A shows the chrM ChIP-seq profile for the PKNOX1 TF in the K562 cell line. The same peak as for MEIS2 is seen, and in this case too BPNet predictions do not match well the observed profile (Figure \ref{Fig44}B). The peak is replicated in HEK293T cells  (Figure \ref{Fig44}D), but not in GM12878  (Figure \ref{Fig44}C) or MCF-7  (Figure \ref{Fig44}E).

Figure \ref{Fig10}A shows the chrM ChIP-seq profile for the CUX1 TF in the MCF-7 cell line. Two strong peaks are observed -- over the 12S rRNA gene and over \textit{MT-ND2}, as well as a weaker one over \textit{MT-ATP6}. BPNet predictions include these peaks but also many others, and their relative predicted and observed strengths do not match well (Figure \ref{Fig10}B). These patterns are replicated in K562 CETCH-seq experiment (Figure \ref{Fig10}D), but not in K562 ChIP-seq and GM12878 experiments carried out with the same $\alpha$-CUX1 antibody (Figure \ref{Fig10}C,E).

\subsection*{E2F TFs}

The E2F family consists of 11 TFs in the human genome, for which data is available for ten. 

Figure \ref{Fig11}A shows the chrM ChIP-seq profile for the E2F1 TF in the K562 cell line. A strong peak is observed over the \textit{MT-ND1} gene, as well as elevated signal around several other loci in the mitochondrial genome. However, this does not match the BPNet-predicted profile (Figure \ref{Fig11}B), and it is also not replicated in any of the other available datasets -- ChIP-seq in K562 cells generated using a different $\alpha$-E2F1 antibody (Figure \ref{Fig11}C), ChIP-seq in MCF-7 cells carried out using HA-tagged E2F1  (Figure \ref{Fig11}D), ChIP-seq in HeLaS3 cells carried out using a third $\alpha$-E2F1 antibody  (Figure \ref{Fig11}E), and CETCH-seq experiments in HepG2 and HeLa-S3 cells  (Figure \ref{Fig11}F-G).

\subsection*{ARID/BRIGHT TFs}

Eight out of 15 ARID/BRIGHT TFs in the human genome have been assayed by ENCODE. 

Figure \ref{Fig2}A shows the chrM ChIP-seq profile for the ARID1B TF in the K562 cell line. A single peak is observed over the \textit{MT-ATP6} gene, but we do not have BPNet support for it (Figure \ref{Fig2}B), and there are not other datasets available for orthogonal evidence.

\subsection*{AP-2 TFs}

One of the five AP-2 TFs in the human genome has been assayed by ENCODE. 

Figure \ref{Fig50}A shows the chrM profile for the TFAP2C TF in a CETCH-seq experiment in the MCF-7 cell line. Two very strong peaks are observed -- over the \textit{MT-ND1} and \textit{MT-ND4} genes. In this case we were not able to train a good BPNet model, and there are no other experiments available for TFAP2C.

\subsection*{Other proteins}

We did not find any putative chrM peaks for TFs in the following families (Figure \ref{Fig55}): AT hook (8/16 assayed), CBF/NF-Y (1/1), CENPB (2/11), CG-1 (1/2), CSD (2/8), CSL (1/2), CxxC (1/11), EBF1 (1/4), Ets (17/28), GATA (7/11), GTF2I-like (1/4), Grainyhead (2/6), HMG/Sox (18/58), IRF (6/9), MADF (1/3), MADS box (1/6), MBD (4/11), Myb/SANT (16/42), NFX (1/2), Ndt80/PhoG (1/2), Pipsqueak (2/2), p53 (1/3), SAND (5/10), SMAD (10/12), STAT (6/7), T-box (5/17), TBP (2/3), TCR/CxC (1/2), TEA (4/5), BED ZF (3/11), CCCH ZF (8/43), and MYM-type ZF (2/16).

We observed notable chrM peaks for two other chromatin proteins -- C11orf30/EMSY and ATM.

Figure \ref{Fig30}A shows the chrM ChIP-seq profile for the C11orf30/EMSY protein in the K562 cell line. Two very strong peaks are observed -- over the \textit{MT-CO1} and \textit{MT-ND5} genes. We trained a BPNet model which predicts peaks over a large set of loci, but does include these two peaks (Figure \ref{Fig30}B). No other datasets are available as orthogonal evidence.

Figure \ref{Fig7}A shows the chrM ChIP-seq profile for the ATM TF in the HepG2 cell line. A peak is observed over the \textit{MT-CO2} gene, but the existing BPNet model does not make high-quality predictions over chrM (Figure \ref{Fig7}B)

\section*{Discussion}

In this work we review the current evidence for mtDNA occupancy by nuclear transcription factors using the vastly expanded collection of ChIP-seq datasets generated by the most recent phases of the ENCODE Project Consortium together with interpretable deep neural network modeling of TF occupancy, continuing from our previous work on the subject a decade ago\cite{Marinov2014,Blumberg2014}. Some evidence for physical association with mtDNA is found for 50 sequence-specific TFs and two other proteins. However, the interpretation of these observations is less straightforward than it was in the past. 

The updated ENCODE collection is qualitatively distinct relative to the much smaller second-phase ENCODE set in that now many TFs have been assayed multiple times, in many different cell lines, and using different combinations of distinct antibodies and/or endogenous epitope tagging. This potentially provides stronger evidence than the more limited data previously available as it can mitigate against the several major concerns that have always existed about observed ChIP-seq peaks over chrM. These are:

\begin{enumerate}
\item Whether the experimental protocol used, specifically the fixation step might have involved some kind of permeabilization that allows nuclear TFs to ``leak'' into the mitochondrial compartment and occupy mtDNA. This should in principle be unlikely if fixation is carried out directly on intact cells, and it would also be expected to result in ChIP-seq profiles showing elevated signal over most of the cognate sequence motifs present in the mitochondrial genome (rather than the one or just a handful of peaks observed in most cases). Still, concerns about the such experimental variation are alleviated of mtDNA occupancy is replicated widely across a large number of datasets generated by different productions groups and in different cell lines. On the other hand, the absence of chrM peaks in all cell lines assayed might represent true biological variation.
\item Whether non-specific binding by antibodies, especially polyclonal ones, may be the source of the observed ChIP-seq peaks, i.e. the peaks are real and mediated by some unknown protein that localizes to mtDNA, but it is not the TF that is being assayed that is occupying those sites. Observing the same peaks with multiple different antibodies or a combination of an antibody and endogenous epitope tagging greatly boosts confidence in the physical occupancy of mtDNA by a given TF.
\item Whether epitope tagging affects subcellular localization and/or expression levels. Most of the epitope tagging datasets examined here were generated by C-terminal tagging of the endogenous TF gene. This means that subcellular localization and expression levels should not be affected, but nevertheless such concerns cannot be completely dismissed. 
\end{enumerate}

The expanded ENCODE collection provides numerous examples of lack of complete concordance between the different available experiments for each TF. It is thus reasonable to provisionally consider TFs to be likely \textit{in vivo} mtDNA binders if there are at least two orthogonal lines of evidence for their chrM occupancy, i.e. ChIP-seq peaks in datasets generated with at least two different antibodies or an antibody and endogenous tagging, or what is the ideal gold evidential standard -- a combination of peaks observed in ChIP-seq datasets and demonstration of localization to mitochondria using immunogold electron microscopy. The latter is however not entirely possible for e.g. many of the C2H2 ZFs, for which epitope tagging was used for ChIP-seq due to the unavailability of immune reagents; in such cases aberrant localization to mitochondria as a result of the tagging cannot be entirely excluded.

With these considerations in mind, we can summarize the available evidence for mtDNA occupancy as follows.

What immediately stands out in the current data is the large number of bZIP factors for which chrM peaks are observed -- nearly half (19/39) of the ones that have been assayed. This is unlikely to be an artifact as numerous lines of evidence converge onto bZIP factors playing a role in mitochondria, even though the evidence for each individual TF can be contradictory. 
For example, ATF2 chrM peaks are seen with multiple antibodies, but not with all or in all epitope tagging experiments; ATF7 peaks are seen in multiple cell lines, but only with one of two antibodies used; FOSL1, FOSL2 and NFE2L1 peaks are not replicated beyond a single dataset, and CEBPG peaks are observed in one epitope-tagged cell line but not in others. On the other hand, CREB1 and MAFK chrM peaks are replicated with multiple different antibodies and NFE peaks in both ChIP-seq and CETCH-seq (although not in all cell lines). 

The bZIP factors also include the three TFs for which direct microscopy evidence exists for localization to mitochondria -- MAFK, JUN and JUND. 

They also exhibit collocalization to a few distinct sites in the mitochondrial genome, which lends mutual support to each other's mtDNA occupancy because many bZIP factors form heterodimers in the form of the AP-1 transcription factor\cite{Hess2004}. Thus, they would be expected to co-occupy the same sites. AP-1's nuclear functions also happen to be generally associated with the regulation of growth and proliferation; these are processes in which mitochondria play important roles. While it is currently not clear how AP-1 might be playing a regulatory role in mitochondria mechanistically, this is an obvious functional connection to consider.

% ATF2 -- multiple antibodies, but not all antibodies or all tagging
% ATF3 -- only one cell line, not in tagging
% ATF4 -- one cell line, not seen in tagging
% ATF7 -- multiple cell lines, but not with the other antibody
% CREB1 -- replicated by tagging and more than one antibody; still not seen in all cell lines
% FOS -- multple cell ines, but not all; same antibody, different production groups
% FOSL1 -- not replicated 
% FOSL2 -- not replicated
% CEBPB -- replicated, but not in all cell lines
% CEBPG -- one tagged cell line, not replicated in other tagged cell lines
% JUN -- replicated in only one cell line; not in others
% JUND -- replicated in multiple celliens, but not in all K562 datasets
% MAFF -- replicated
% MAFG -- not replicated by CETCH
% MAFK -- replicated with two different antibodies and in most cell lines
% NFE -- K562 Ab and CETCH-seq; not seen in other CETCH and ChIP
% NFE2L1 -- not replicated
% NFE2L2 -- replciated in more than one cell line
% NRL -- only a single experiment

On the other extreme of reliability of the available evidence lies the set of C2H2 ZF TFs (DZIP1, HIVEP1, ZNF225, ZNF263, ZNF274, ZNF280B, ZNF350, ZNF598, ZNF768, ZNF839, ZNF891) together with THAP9, NFKB2 and RXRA. All of these datasets exhibit almost the same ChIP-seq profile, are almost all derived from epitope tagging experiments, and they also show elevated signal over nearly all predicted occupancy sites. These peaks are not replicated by any antibody ChIP-seq datasets where available, and thus they are most likely an artifact, although it is not clear why only these CETCH-seq experiments would generate such an artifact and not the hundreds others.

% ZNF316 -- not replicated by different antibodiy in the same cell line
% DZIP1, HIVEP1, ZNF225, ZNF263, ZNF274, ZNF280B, ZNF350, ZNF598, ZNF768, ZNF839, ZNF891; nine are CETCH-seq, 2 are ChIP
% THAP9, NFKB2, 
% RXRA -- CETCH-seq, does not replicate by antibodies

Evidence is also currently weak for mtDNA occupancy by BHLHE40, MITF, FOXA1, FOXA2, HSF1, E2F1, due to lack of replication and/or lack of support from BPNet predictions. 

For factors such as MAX, RFX1, RFX5 and PKNOX1 ChIP-seq peaks are seen in multiple cell lines, although not in others. They are provisionally more likely to be truly occupying mtDNA than not.

Yet other factors -- SREBF1, RFX1, RUNX3, CUX1, ARID1B, TFAP2C, C11orf30/EMSY and ATM -- have only been assayed in a single cell line, and thus the available evidence is simply too limited to say much more about them.

% BHLHE40 -- does not match BPNet; does not replicate
% MAX -- replicated in a few, but not most cell lines
% MITF -- does not match BPNet
% SREBF1 -- no additional data

% RFX1 -- replicated in one cell line, not in the other
% RFX5 -- does not replicate in many cell lines

% FOXA1, FOXA2 -- do not replicate with other antibodies and CETCH-seq, weak peaks

% RUNX3 -- no BPNet or other datasets

% HSF1 -- no BPNet support, no replication

% MEIS2 -- no other datasets
% PKNOX1 -- replicated in one cell line, not in others
% CUX1 -- replicated by CETCH, but not by ChIP

% E2F1 -- no BPNet support; no replication

% ARID1B -- no other data
% TFAP2C -- no other data, no model

% C11orf30/EMSY and ATM -- no other data

Nevertheless, some broad trends emerge. While the bZIP factors appear to be particularly enriched for potential mitochondrial moonlighting, other large and important TF families show very little such evidence. Even if the 12 C2H2 ZFs turn out not to be the result of an experimental artifact, they would represent only a small fraction (12/419) of the huge diversity of such TFs. Other large TF families that, although not exhaustively sampled, don't seem to bind to chrM include HMG/Sox, nuclear receptors, Homeodomain TFs, and Myb/SANT. A few of the smaller TF families have also been almost exhaustively sampled and they too show no evidence for mitochondrial localization. These include GATA, IRF, SMAD, STAT, and TEA.

In summary, our work represents the most comprehensive catalog of human TFs potentially occupying mitochondrial DNA compiled so far, and provides the foundation for the subsequent direct validation and characterization of the possible functions of these factors in mitochondrial gene regulation.

\section*{Methods}

\subsection*{ChIP-seq data processing}

Raw sequencing reads for transcription factor ChIP-seq datasets  were downloaded from the ENCODE Consortium Portal\cite{Luo2020} (\burl{https://www.encodeproject.org/}; data current as of May 1st 2022). Reads were aligned using Bowtie\cite{Langmead2009} (version 1.1.1) as 1$\times$36mers against an index containing the mitochondrial genome, with the following settings \verb|``-v 2 -k 2 -m 1 -t --best --strata''|.

The \verb|hg38| version of the \textit{Homo sapiens} genome was used for all analysis.

These initial alignments were used for a fast screen of all available datasets and to generate a list of putative mitochondrial genome-occupying TFs.

% Subsequently, that subset of datasets were realigned against a combined nuclear and mitochondrial genome index in order to evaluate relative signal strength in the two genomes.

% The \verb|hg38|, \verb|mm10|, \verb|ce10| and \verb|dm6| versions of the \textit{Homo sapiens}, \textit{Mus musculus}, \textit{Caenorhabditis elegans} and \textit{Drosophila melanogaser} genomes, respectively, were used.

\subsection*{Mappability track generation}

Mappability was assessed as follows. Sequences of length $N$ bases were generated starting at each position in the mitochondrial genome. The resulting set of ``reads'' was then mapped against the same bowtie index used for mapping real data. Positions covered by $N$ reads were considered fully mappable. In this case, $N = 36$ as this is the read length for most of the sequencing data analyzed in this study.

\subsection*{Screening for TF occupancy over the mitochondrial genome}

We applied the procedure we had previously devised\cite{Marinov2014}. Briefly, for each dataset, we fit a Gamma distribution over the RPM coverage scores for the bottom $F_b$ percentile of fully mappable position on the mitochondrial chromosome, then used the estimated parameters to rescale the raw signal over all position to a $z$-score. Datasets with strong peaks thus receive low z-scores over most of the mappable mitochondrial genome, and very high z-scores over the regions with highly localized enrichment. A value of $F=0.8$ was used for our analysis. As this procedure is sensitive to datasets with very low total read coverage over the mitochondrial genome, we restricted our analysis to datasets with at least 5,000 uniquely mappable reads, i.e. $\geq 10x$ coverage. Datasets with high maximum $z$-scores  ($z \geq$ 15) were then manually examined (using Circos\cite{Krzywinski2009} plots) to screen out potentially artifactual high ChIP signal localization events that do not display the expected asymmetric pattern around true occupancy sites.

\subsection*{BPNet model training and predictions}

BPNet\cite{BPNet} models were trained on available ENCODE ChIP-seq datasets as follows. \hl{XXX DESCRIBE WHAT WAS DONE EXACTLY XXX}.

Trained models were then used to generated predicted occupancy profiles for both strands over the mitochondrial genome by \hl{XXXX}.

% \subsection*{Motif analysis}

% The peak calls for human and mouse ENCODE data available from the USCS Genome Browser were used to find de novo motifs for transcription factors from ChIP-seq data. The sequence around the peak summit (using a 50bp radius) was retrieved for the top 500 called peaks for each factor in each cell line and motifs were called using the MEME program in the MEME SUITE, version 4.6.1 \cite{Bailey2009}. The MEME-defined position weight matrix was then used to scan the mitochondrial genome for motif matches following the approach described in \cite{Mortazavi2006}.

\section*{Author contributions}

G.K.M. conceived the project and carried out data analysis with supervision from A.K. V.R. trained BPNet models and generated BPNet predictions. A.K. supervised the study. G.K.M. wrote the manuscript with input from all authors.

\section*{Acknowledgments}

The authors would like to thank members of the Kundaje and Greenleaf labs for useful comments and discussions. This work was supported by NIH grants 1UM1HG009436, 1DP2OD022870-01 and 1U01HG009431 to A.K. Fellowship support also provided by the Stanford School of Medicine Dean's Fellowship (G.K.M.).

% This work was supported by NIH grants (P50HG007735, RO1 HG008140, U19AI057266 and UM1HG009442 to W.J.G., 1UM1HG009436 to W.J.G. and A.K., 1DP2OD022870-01 and 1U01HG009431 to A.K., and HG006827 to C.H.), the Rita Allen Foundation (to W.J.G.), the Baxter Foundation Faculty Scholar Grant, and the Human Frontiers Science Program grant RGY006S (to W.J.G). W.J.G is a Chan Zuckerberg Biohub investigator and acknowledges grants 2017-174468 and 2018-182817 from the Chan Zuckerberg Initiative. 

\begin{thebibliography}{100}

% \section*{References}

\input{references}

\end{thebibliography}

\end{multicols}

\clearpage

\setcounter{table}{0}
\renewcommand{\tablename}{Supplementary Table}
\setcounter{figure}{0}
\renewcommand{\figurename}{Supplementary Figure}

\setcounter{page}{1}
\renewcommand\thepage{{SM }\arabic{page}}

\begin{center}
% {\LARGE \textbf{\begin{spacing}{1.1}XXXX. \\ Supplementary Materials\end{spacing} }}
{\LARGE \textbf{Supplementary Materials}}
\end{center}

% \section*{Supplementary Tables}

\section*{Supplementary Figures}

\begin{figure*}[!ht]
\begin{center}
\includegraphics[width=15cm]{FigS1-mappability.png}
\end{center}
\captionsetup{singlelinecheck=off,justification=justified}
\caption{
{\bf Unique mappability (for 1$\times$36mer reads) of the mitochondrial genome in the combined nuclear plus mitochondrial genomic space}. 
% NOT REPLICATED
} 
\label{FigS1}
\end{figure*}


\end{document}
