##################################
#                                #
# Last modified 2018/03/27       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
# import scipy.stats
import numpy as np
import math
import random
from sets import Set
import time

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s FirstRoundBarcodes SecondRoundBarcodes step_size outfile [-Round2Poisson]' % sys.argv[0]
        sys.exit(1)

    BC1 = int(sys.argv[1])
    BC2 = int(sys.argv[2])
    outfilename = sys.argv[4]
    step = int(sys.argv[3])

    BC1Set = []
    for i in range(BC1):
        BC1Set.append(i)

    doPoissonR2 = False
    if '-Round2Poisson' in sys.argv:
        doPoissonR2 = True
        print 'will simulare Poisson loading for the second round'

    outfile = open(outfilename,'w')
    outline = '#Cells_to_be_sorted\tTotal_apparent_cells\tduplicates_fraction'
    outfile.write(outline + '\n')

    for i in range(1,BC1,step):
        BC2Dict = {}
        for j in range(BC2):
            BC2Dict[j] = []
            if doPoissonR2:
                for k in range(np.random.poisson(i)):
                    BC2Dict[j].append(random.sample(BC1Set,1)[0])
            else:
                for k in range(i):
                    BC2Dict[j].append(random.sample(BC1Set,1)[0])
        TotalCells = BC2*i
        Duplicates = 0.0
        for j in range(BC2):
            S = list(Set(BC2Dict[j]))
            Duplicates += (i-len(S))
            TotalCells -= (i-len(S))
        outline = str(i) + '\t' + str(TotalCells) + '\t' + str(Duplicates/TotalCells)
        print BC1, BC2, outline
        outfile.write(outline + '\n')
    
    outfile.close()

run()