# noinspection PyUnresolvedReferences
import debugger
import csv, pandas as pd, sys, warnings
sys.path.append('..')
from get_loop_partners import get_loop_partners
warnings.simplefilter('error', RuntimeWarning)

# Load fine-mapping regions (without paddings)

regions = pd.read_table('fine_mapping_library/regions.tsv')
regions.set_index(['region_chrom', 'region_start', 'region_end'],
                  drop=False, inplace=True)

# Load A/D/I/K replicate results

replicates = 'A1', 'A2', 'D1', 'D2', 'I1', 'I2', 'K1', 'K2'
screen_results = {}
for replicate in replicates:
    screen_results[replicate] = {'#GeneID': [], 'casTLE Score': [],
                                 'Individual Elements': []}
    reader = csv.reader(open(f'/mnt/lab_data/bassik/All_Screen_Data/'
                             f'Results/JT/T14_{replicate}_CTCF.csv'))
    header = next(reader)
    column_indices = {column: header.index(column)
                      for column in screen_results[replicate]}
    for row in reader:
        screen_results[replicate]['#GeneID'].append(row[column_indices[
            '#GeneID']])
        screen_results[replicate]['casTLE Score'].append(
            row[column_indices['casTLE Score']])
        screen_results[replicate]['Individual Elements'].append(
            ';'.join(row[column_indices['Individual Elements']:]))
    screen_results[replicate] = pd.DataFrame(screen_results[replicate])
    screen_results[replicate].set_index('#GeneID', inplace=True)

# For each positive CTCF, print A/D/I/K replicate casTLE scores and guide
# effects, as well as locations of all loop partners

positives = regions[regions.region == 'positive']
output = pd.DataFrame(
    index=positives.index,
    columns=['chrom', 'start', 'end', 'Loop partners'] +
            [f'{replicate} casTLE score' for replicate in replicates] +
            [f'{replicate} guide effects' for replicate in replicates])
for positive in positives.itertuples():
    motifs, ChIP_peaks = \
        get_loop_partners(positive.region_chrom, positive.region_start,
                          positive.region_end, padding=500)
    positive_key = f'{positive.region_chrom.upper()};' \
                   f'{positive.region_start};{positive.region_end}'
    output.loc[positive.Index, 'chrom'] = positive.region_chrom
    output.loc[positive.Index, 'start'] = positive.region_start
    output.loc[positive.Index, 'end'] = positive.region_end
    output.loc[positive.Index, 'Loop partners'] = ';'.join(
        map(str, motifs + ChIP_peaks))
    for replicate in replicates:
        output.loc[positive.Index, f'{replicate} casTLE score'] = \
            screen_results[replicate].loc[positive_key, 'casTLE Score']
        output.loc[positive.Index, f'{replicate} guide effects'] = \
            screen_results[replicate].loc[positive_key, 'Individual Elements']
output.to_csv('crop_seq_candidates.tsv.gz', sep='\t', compression='gzip')