ó
ø-$Ic           @   sŠ   d  Z  y
 e Z Wn! e k
 r3 d d l m Z n Xd d l Z d d
 d „  ƒ  YZ d „  Z d d „ Z	 e d d „ Z
 e d d	 „ Z d S(   s¼  
This module provides code for doing k-nearest-neighbors classification.

k Nearest Neighbors is a supervised learning algorithm that classifies
a new observation based the classes in its surrounding neighborhood.

Glossary:
distance   The distance between two points in the feature space.
weight     The importance given to each point for classification. 


Classes:
kNN           Holds information for a nearest neighbors classifier.


Functions:
train        Train a new kNN classifier.
calculate    Calculate the probabilities of each class, given an observation.
classify     Classify an observation into a class.

    Weighting Functions:
equal_weight    Every example is given a weight of 1.

iÿÿÿÿ(   t   SetNt   kNNc           B   s   e  Z d  Z d „  Z RS(   s  Holds information necessary to do nearest neighbors classification.

    Members:
    classes  Set of the possible classes.
    xs       List of the neighbors.
    ys       List of the classes that the neighbors belong to.
    k        Number of neighbors to look at.

    c         C   s+   t  ƒ  |  _ g  |  _ g  |  _ d |  _ d S(   s   kNN()N(   t   sett   classest   xst   yst   Nonet   k(   t   self(    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt   __init__.   s    		(   t   __name__t
   __module__t   __doc__R	   (    (    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyR   $   s   	c         C   s   d S(   s   equal_weight(x, y) -> 1i   (    (   t   xt   y(    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt   equal_weight5   s    c         C   sC   t  ƒ  } t | ƒ | _ t j |  | ƒ | _ | | _ | | _ | S(   sN  train(xs, ys, k) -> kNN
    
    Train a k nearest neighbors classifier on a training set.  xs is a
    list of observations and ys is a list of the class assignments.
    Thus, xs and ys should contain the same number of elements.  k is
    the number of neighbors that should be examined when doing the
    classification.
    
    (   R   R   R   t   numpyt   asarrayR   R   R   (   R   R   R   t   typecodet   knn(    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt   train:   s    
			c         C   s[  t  j | ƒ } g  } | rg x¾ t t |  j ƒ ƒ D]/ } | | |  j | ƒ } | j | | f ƒ q1 Wnu t  j t | ƒ ƒ } x] t t |  j ƒ ƒ D]F } | |  j | | (t  j t  j | | ƒ ƒ } | j | | f ƒ q’ W| j	 ƒ  i  } x |  j
 D] }	 d | |	 <qö WxJ | |  j  D]; \ } } |  j | }
 | |
 | | |  j | ƒ | |
 <qW| S(   s  calculate(knn, x[, weight_fn][, distance_fn]) -> weight dict

    Calculate the probability for each class.  knn is a kNN object.  x
    is the observed data.  weight_fn is an optional function that
    takes x and a training example, and returns a weight.  distance_fn
    is an optional function that takes two points and returns the
    distance between them.  If distance_fn is None (the default), the
    Euclidean distance is used.  Returns a dictionary of the class to
    the weight given to the class.
    
    g        (   R   R   t   ranget   lenR   t   appendt   zerost   sqrtt   dott   sortR   R   R   (   R   R   t	   weight_fnt   distance_fnt   ordert   it   distt   tempt   weightsR   t   klass(    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt	   calculateK   s&    
&c   	      C   so   t  |  | d | d | ƒ} d } d } xA | j ƒ  D]3 \ } } | d k sX | | k r4 | } | } q4 q4 W| S(   s]  classify(knn, x[, weight_fn][, distance_fn]) -> class

    Classify an observation into a class.  If not specified, weight_fn will
    give all neighbors equal weight.  distance_fn is an optional function
    that takes two points and returns the distance between them.  If
    distance_fn is None (the default), the Euclidean distance is used.
    R   R   N(   R$   R   t   items(	   R   R   R   R   R"   t
   most_classt   most_weightR#   t   weight(    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt   classifys   s    (    (   R   R   t	   NameErrort   setsR    R   R   R   R   R   R$   R)   (    (    (    sz   /oak/stanford/groups/akundaje/marinovg/programs/biopython-1.50.tar.gz/biopython-1.50/build/lib.linux-x86_64-2.7/Bio/kNN.pyt   <module>   s   
	(