##################################
#                                #
# Last modified 2023/10/27       # 
#                                #
# Georgi Marinov                 #
#                                # 
##################################

import sys
import string
import math
from sets import Set
import os

def run():

    if len(sys.argv) < 4:
        print 'usage: python %s  datafilename min max outfilename [-absVal] [-splitby]' % sys.argv[0]
        print '\tuse "Inf" to indicate greater or smaller than'
        print "\tNote: you can use gzipped and bzipped files, the script will detect those automaticlaly if they end in .gz or .bz2"
        sys.exit(1)

    input = sys.argv[1]
    doMoreThanMin=False
    doLessThanMax=False
    doMinQ = False
    doMaxQ = False
    if sys.argv[2] == 'Inf':
        minV = sys.argv[2]
    elif sys.argv[2].startswith('q-'):
        n1 = float(sys.argv[2].split('-')[1])
        m1 = float(sys.argv[2].split('-')[2])
        doMinQ = True
        doMoreThanMin = True
    else:
        minV = float(sys.argv[2])
        doMoreThanMin = True
    if sys.argv[3] == 'Inf':
        maxV = sys.argv[3]
    elif sys.argv[3].startswith('q-'):
        n2 = float(sys.argv[3].split('-')[1])
        m2 = float(sys.argv[3].split('-')[2])
        doMaxQ = True
        doLessThanMax = True
    else:
        maxV = float(sys.argv[3])
        doLessThanMax = True
    outfilename = sys.argv[4]

    doAbsVal = False
    if '-absVal' in sys.argv:
        doAbsVal = True
        print 'will use absolute values'

    SplitBy = '\t'
    if '-splitby' in sys.argv:
        SplitBy = sys.argv[sys.argv.index('-splitby') + 1]

    outfile = open(outfilename, 'w')

    if doMinQ or doMaxQ:
        ValueList = []
        if input.endswith('.bz2'):
            cmd = 'bzip2 -cd ' + input
        elif input.endswith('.gz'):
            cmd = 'gunzip -c ' + input
        else:
            cmd = 'cat ' + input
        p = os.popen(cmd, "r")
        line = '.'
        LL = 0
        while line != '':
            line = p.readline()
            LL+=1
            if LL % 1000 == 0:
                print LL, 'lines processed'
            if line.startswith('#'):
                outfile.write(line)
                continue
            if line == '':
                continue
            fields = line.strip().split(SplitBy)
            try:
                V = float(fields[1])
            except:
                print 'skipping line'
                print line
                continue
            values = []
            for i in range(1,len(fields)):
                V = float(fields[i])
                if doAbsVal:
                    values.append(math.fabs(V))
                else:
                    values.append(V)
            maxvallue = max(values)
            ValueList.append(maxvalue)
        ValueList.sort()
        print min(ValueList), max(ValueList)
        if doMinQ:
            minV = ValueList[int(m1*(len(ValueList)/n1))]
            print int(m1*(len(ValueList)/n1)), minV, 
        if doMaxQ:
            maxV = ValueList[int(m2*(len(ValueList)/n2))]
            print int(m2*(len(ValueList)/n2)), maxV

#    lineslist = open(input)
    if input.endswith('.bz2'):
        cmd = 'bzip2 -cd ' + input
    elif input.endswith('.gz'):
        cmd = 'gunzip -c ' + input
    else:
        cmd = 'cat ' + input
    p = os.popen(cmd, "r")
    line = '.'
    LL = 0
    while line != '':
        line = p.readline()
#    for line in lineslist:
        LL+=1
        if LL % 1000 == 0:
            print LL, 'lines processed'
        if line.startswith('#'):
            outfile.write(line)
            continue
        if line == '':
            continue
        fields = line.strip().split(SplitBy)
        try:
            V = float(fields[1])
        except:
            print 'skipping line'
            print line
            continue
        values = []
        for i in range(1,len(fields)):
            V = float(fields[i])
            if doAbsVal:
                values.append(math.fabs(V))
            else:
                values.append(V)
        maxvallue = max(values)
        doPrint = True
        if doMoreThanMin and maxvallue < minV:
            doPrint = False
        if doLessThanMax and maxvallue > maxV:
            doPrint = False
        if doPrint:
            outfile.write(line)

    outfile.close()

run()

