o
    8iG                  
   @   s   d Z ddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 dedededee fd	d
Zdededededef
ddZdedefddZdedededefddZdd Zedkr_e  dS dS )z
Generate an HTML summary page for a variant that displays all plots in an easily-readable format.

Usage:
    python generate_variant_summary_html.py <variant_dir> <variant_id> <clustered_tsv> <model_dataset> <output_html>
    N)Path)ListOptionalDictclustered_tsv
variant_idmodel_datasetreturnc              
   C   s  |   std|   g S z	tj| dd}W n ty1 } ztd|  g W  Y d}~S d}~ww ||d |k }|jrGtd| d g S g }d	| }||jv r|| jd
 }t|rt	|
 rdd t	|dD }	|	D ]}
d|
v r|
dd
 }|r|| qs|s|jD ]<}|d	r|| jd
 }t|rt	|
 rdd t	|dD }	|	D ]}
d|
v r|
dd
 }|r|| qqtt|S )a
  Get prioritized models for a variant from clustered.tsv.
    
    Reads the models_prioritized_by_any-{model_dataset} column and parses
    the semicolon-separated format to extract model names.
    
    Parameters:
    -----------
    clustered_tsv : Path
        Path to clustered.tsv file
    variant_id : str
        Variant ID to look up
    model_dataset : str
        Model dataset name (e.g., "Fetal Brain")
    
    Returns:
    --------
    List[str]
        List of model names that prioritize this variant
    z'Warning: clustered.tsv does not exist: 	)sepzError reading clustered.tsv: Nr   zWarning: Variant z not found in clustered.tsvzmodels_prioritized_by_any-r   c                 S      g | ]
}|  r|  qS  strip.0mr   r   *snakemake/generate_variant_summary_html.py
<listcomp><       z3get_prioritized_models_from_tsv.<locals>.<listcomp>;(c                 S   r   r   r   r   r   r   r   r   J   r   )existsprintpdread_csv	Exceptionemptycolumnsilocnotnastrr   splitappend
startswithlistset)r   r   r   dfevariant_rowmodelsaggregated_col
models_strmodel_entriesentrymodelcolr   r   r   get_prioritized_models_from_tsv   sN   





r1   variant_dirc                 C   sd   | d | d | d | d g d}t |||}|D ]}| d| d }|d ||| d	 q|S )
zFind all expected plot paths for a variant, regardless of whether they exist.
    
    Returns:
        dict with keys: 'barplot', 'barplot_superset', 'scatterplot', 'scatterplot_superset', 'profiles' (list)
     01-model-specificity-barplot.png)01-model-specificity-barplot-superset.png02-model-scatterplot.html"02-model-scatterplot-superset.htmlbarplotbarplot_supersetscatterplotscatterplot_supersetprofiles03-profile-z.pngr<   )pathr/   r   )r1   r#   r   )r2   r   r   r   plotsprioritized_modelsr/   profile_pathr   r   r   find_expected_plotsT   s   	
rB   c                 C   s   ddddg d}| d }|  r||d< | d }|  r ||d< | d }|  r,||d< | d	 }|  r8||d
< t| dD ]}|jdd}|d ||d q?|S )zFind all plot files in the variant directory.
    
    Returns:
        dict with keys: 'barplot', 'barplot_superset', 'scatterplot', 'scatterplot_superset', 'profiles' (list)
    Nr7   r3   r8   r4   r9   r5   r:   r6   r;   z03-profile-*.pngr=    r<   )r>   r/   )r   sortedglobstemreplacer#   )r2   r?   barplot_pathbarplot_superset_pathscatterplot_pathscatterplot_superset_pathprofile_file
model_namer   r   r   
find_plotsp   s2   	
rN   output_htmlr?   c              	      s  |j  dtdtf fdd}d| d| d}|d7 }|d	7 }|d
}|rIt|tr<| r<||}|d| d7 }n||}|d| d7 }n|d7 }|d7 }|d7 }|d7 }|d}|rt|trv| rv||}|d| d7 }n||}|d| d7 }n|d7 }|d7 }|d7 }|d7 }|d}	|	rt|	tr|	 r||	}|d| d7 }n||	}|d| d7 }n|d7 }|d7 }|d7 }|d7 }|d}
|
rt|
tr|
 r||
}|d| d7 }n||
}|d| d7 }n|d7 }|d7 }|d7 }|d 7 }|d! rT|d"7 }|d! D ]:}|d# }||}|d$d%}|r;|d&|d'  d(| d)|d'  d*7 }q|d&|d'  d(| d)|d'  d+7 }q|d,7 }n|d-7 }|d7 }|d.7 }|j jd/d/d0 t|d1}|| W d2   n	1 s~w   Y  t	d3|  d2S )4z+Generate HTML summary page for the variant.	file_pathr	   c                    s,   zt j|  W S  ty   t|  Y S w )z/Get relative path from output HTML to the file.)osr>   relpath
ValueErrorr!   )rP   
output_dirr   r   get_relative_path   s
   z(generate_html.<locals>.get_relative_pathz<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Variant Summary: al
  </title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
            line-height: 1.6;
            color: #333;
            max-width: 1400px;
            margin: 0 auto;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .header {
            background-color: #2c3e50;
            color: white;
            padding: 20px;
            border-radius: 8px;
            margin-bottom: 30px;
        }
        .header h1 {
            margin: 0;
            font-size: 24px;
        }
        .header .variant-id {
            font-family: 'Courier New', monospace;
            font-size: 18px;
            margin-top: 10px;
            opacity: 0.9;
        }
        .section {
            background-color: white;
            padding: 25px;
            margin-bottom: 25px;
            border-radius: 8px;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .section h2 {
            margin-top: 0;
            color: #2c3e50;
            border-bottom: 2px solid #3498db;
            padding-bottom: 10px;
        }
        .plot-container {
            margin: 20px 0;
            text-align: center;
        }
        .plot-container img {
            max-width: 100%;
            height: auto;
            border: 1px solid #ddd;
            border-radius: 4px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        }
        .plot-container iframe {
            width: 100%;
            height: 600px;
            border: 1px solid #ddd;
            border-radius: 4px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        }
        .profile-grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(500px, 1fr));
            gap: 20px;
            margin-top: 20px;
        }
        .profile-item {
            background-color: #f8f9fa;
            padding: 15px;
            border-radius: 4px;
            border: 1px solid #e0e0e0;
        }
        .profile-item h3 {
            margin-top: 0;
            color: #2c3e50;
            font-size: 16px;
        }
        .profile-item img {
            width: 100%;
            height: auto;
        }
        .no-plots {
            color: #999;
            font-style: italic;
            text-align: center;
            padding: 40px;
        }
        .plot-label {
            font-weight: 600;
            color: #555;
            margin-bottom: 10px;
            font-size: 14px;
        }
    </style>
</head>
<body>
    <div class="header">
        <h1>Variant Summary</h1>
        <div class="variant-id">z</div>
    </div>
z    <div class="section">
z;        <h2>Model Specificity Barplot (Cluster-level)</h2>
r8   z        <div class="plot-container">
            <div class="plot-label">Model prioritization across tissues/organs in the current cluster/model dataset</div>
            <img src="z2" alt="Model Specificity Barplot">
        </div>
a  " alt="Model Specificity Barplot" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
            <div style="display:none; color: #999; font-style: italic; padding: 20px;">Plot will be available after generation</div>
        </div>
zG        <div class="no-plots">No cluster-level barplot available</div>
z    </div>

z<        <h2>Model Specificity Barplot (Superset-level)</h2>
r9   z        <div class="plot-container">
            <div class="plot-label">Model prioritization across tissues/organs in the superset (broader context)</div>
            <img src="z=" alt="Model Specificity Barplot (Superset)">
        </div>
a  " alt="Model Specificity Barplot (Superset)" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
            <div style="display:none; color: #999; font-style: italic; padding: 20px;">Plot will be available after generation</div>
        </div>
zH        <div class="no-plots">No superset-level barplot available</div>
z3        <h2>Model Scatterplot (Cluster-level)</h2>
r:   z        <div class="plot-container">
            <div class="plot-label">Interactive scatterplot showing variant scores across models in the current cluster/model dataset</div>
            <iframe src="z+" frameborder="0"></iframe>
        </div>
a  " frameborder="0" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';"></iframe>
            <div style="display:none; color: #999; font-style: italic; padding: 20px;">Plot will be available after generation</div>
        </div>
zK        <div class="no-plots">No cluster-level scatterplot available</div>
z4        <h2>Model Scatterplot (Superset-level)</h2>
r;   z        <div class="plot-container">
            <div class="plot-label">Interactive scatterplot showing variant scores across all models in the superset (broader context)</div>
            <iframe src="zL        <div class="no-plots">No superset-level scatterplot available</div>
z        <h2>Profile Plots</h2>
r<   z#        <div class="profile-grid">
r>   r   Fz;            <div class="profile-item">
                <h3>r/   z </h3>
                <img src="z" alt="Profile plot for z">
            </div>
z" onerror="this.style.display='none'; this.nextElementSibling.style.display='block';">
                <div style="display:none; color: #999; font-style: italic; padding: 20px;">Plot will be available after generation</div>
            </div>
z        </div>
z?        <div class="no-plots">No profile plots available</div>
z</body>
</html>
Tparentsexist_okwNu   ✓ Generated HTML summary: )
parentr   r!   get
isinstancer   mkdiropenwriter   )r2   r   rO   r?   rV   html_contentrH   rel_pathrI   rJ   rK   profilerA   r   fr   rT   r   generate_html   s   fk





re   c                  C   s  t tjdkrtd td ttjd } tjd }ttjd }tjd }ttjd }| jddd	 t| |||}t| ||| t |d
 }td| d |d
 D ]}|	ddr`dnd}td|d  d|  qVt
|	dto||d  }	t
|	dto|d  }
t
|	dto|d  }t
|	dto|d  }td|	rdnd  td|
rdnd  td|rdnd  td|rdnd  d S )N   zwUsage: python generate_variant_summary_html.py <variant_dir> <variant_id> <clustered_tsv> <model_dataset> <output_html>               TrW   r<   zSummary: Expected z profile plot(s)r   Fu   ✓z	(pending)z  - r/   z: r8   r9   r:   r;   z  - Barplot: z  - Barplot (superset): z  - Scatterplot: z  - Scatterplot (superset): )lensysargvr   exitr   r^   rB   re   r\   r]   r   )r2   r   r   r   rO   r?   num_profilesrc   statusbarplot_existsbarplot_superset_existsscatterplot_existsscatterplot_superset_existsr   r   r   main  s0   


rv   __main__)__doc__rm   rQ   pandasr   pathlibr   typingr   r   r   r!   r1   dictrB   rN   re   rv   __name__r   r   r   r   <module>   s    D.  &
