########################################
# The contents of this file are subject to the MLX PUBLIC LICENSE version
# 1.0 (the "License"); you may not use this file except in
# compliance with the License.
# 
# Software distributed under the License is distributed on an "AS IS"
# basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
# the License for the specific language governing rights and limitations
# under the License.
# 
# The Original Source Code is "compClust", released 2003 September 03.
# 
# The Original Source Code was developed by the California Institute of
# Technology (Caltech).  Portions created by Caltech are Copyright (C)
# 2002-2003 California Institute of Technology. All Rights Reserved.
########################################
#
# Written By:  Lucas Scharenbroich
# Date      :  June 2001
# 
# Purpose   :  Supports the xclust GTR format in an in-memory structure.
#              Methods allow for agglomeration of the gene data
#

"""
An extension of the BinaryTree class for use by XClust.

The XClustTree is a binary tree with added methods to allow for the direct
reading of XClust-produced GTR files.  See the XClust documentation for more
details.
"""

import sys
import Numeric
import string
import re

from BinaryTree import *
from Node import *

class XClustTree(BinaryTree):

    def read(self, GTRdata):
        """ read(GTRdata): Builds an xclust tree off of this node from
                           a GTR file and returns the number of nodes
                           processed
        """

        # process the GTR file

        nodeDict = {}
        for line in open(GTRdata, 'r').readlines():
            tokens = string.split(line, '\t')
            nodeDict[tokens[0]] = tokens[1:]

        #
        # start at the root and create the Tree
        #

        self.setRoot(self.__createTree(nodeDict))
        
        return len(nodeDict.keys())

    def __createTree(self, nodeDict): 
        """
        an internal function which builds a tree from a node dictionary
        """

        #
        # Create a new node for everything in the node dictionary

        for node in nodeDict.keys():
            data = nodeDict[node][2]

            newNode = Node(node, data)
            MultiWayTree.insert(self, newNode)

            #
            # If the child nodes are Genes, enter them in explicitly

            left  = nodeDict[node][0]
            right = nodeDict[node][1]

            if left[0] == 'G' or left[0] == 'A':
                newNode = SimpleNode(left)
                MultiWayTree.insert(self, newNode)
                
            if right[0] == 'G' or right[0] == 'A':
                newNode = SimpleNode(right)
                MultiWayTree.insert(self, newNode)

        #
        # Iterate over the set of XClustTreeNodes and join them as needed

        for node in nodeDict.keys():

            if node[0] == 'G' or node[0] == 'A':
                continue
            
            #
            # Get the left and right subtrees of this node
            
            left  = nodeDict[node][0]
            right = nodeDict[node][1]

            #
            # Now find them in the cluster of nodes and link them in

            self.join(node, left, right)

        #
        # The complete tree is built, so start at the last node and run
        # up the tree until we hit the root, i.e. link[parent] == None

        root = node
        while (self.parent(root) != None):
            root = self.parent(root)

        return root
    


