#			 -*- Mode: Darwin -*-
# CodonTools -- Tools for manipulating Codons
# Author          : Gina CANNAROZZI
# Created On      : 2 Aug 2001

# Summary of What is Available for conversions
#
#AToInt convert a 1 letter amino acid code to an integer
#AAAToInt convert a 3 letter amino acid code to an integer
#AminoToInt convert a amino acid name to an integer
#BToInt (old NToInt) convert a 1 letter nucleic acid code to an integer
#BBBToInt (old NucToInt) convert a 3 letter nucleic acid code to an integer
#BaseToInt (old NucleicToInt) convert nucleic acid name to an integer
#IntToA, IntToAAA, IntToAmino, IntToB (IntToN), IntToBBB(IntToNuc), 
#IntToBase (IntToNucleic)

#CodonToA (GenCode) convert a codon into the corresponding amino acid 1 letter code
#CodonToInt (GenCodeToInt) convert a codon into the corresponding amino acid integer
#
#CodonToCInt (NucToCode) a'convert a 3-letter codon into a integer from 0 to 64 
#CIntToCodon (CodeToNuc) convert an integer from 1 to 64 into 3-letter codon
#
#AToCodon (AToGenCode)-  convert a 1 letter amino acid code to a list of codons [codon 1, codon 2, ... ]
#IntToCodn (IntToGenCode)-  convert a 1 letter amino acid code to a list of codons [codon int 1, codon int 2, ... ]
#
#CIntToInt (CodonToInt)- convert a number from 1 to 64 to the corresponding amino acid integer
#CIntToA (CodonToA)- convert a number from 1 to 64 to the corresponding amino acid  1 letter code
#CIntToAAA (CodonToAAA)- convert a number from 1 to 64 to the corresponding amino acid 3 letter code
#CIntToAmino (CodonToAmino)- convert a number from 1 to 64 to the corresponding amino acid name
#AToCInt amino acid 1 letter code to codon integer
#IntToCInt  amino acid integer to codon integer

#
# AToCInt: convert 1 letter amino acid to a codon integer from 1..64
#
AToCInt_array := [
[37, 38, 39, 40], [9, 11, 25, 26, 27, 28],
[2, 4], [34, 36], [58, 60], [17, 19],
[33, 35], [41, 42, 43, 44], [18, 20],
[13, 14, 16], [29, 30, 31, 32, 61, 63],
[1, 3], [15], [62, 64], [21, 22, 23, 24],
[10, 12, 53, 54, 55, 56], [5, 6, 7, 8],
[59], [50, 52], [45, 46, 47, 48] ]:

AToCInt := proc(AA:string)
  #AToInt returns 0 to 21- 0 for error, 21 for X
  if AA = '$' then return([49,51,57]) fi:
  aa := AToInt(uppercase(AA)):
  if aa = 0 or aa > 21 then error('not a valid amino acid abbreviation')
  elif aa = 21 then return([])
  else AToCInt_array[aa] fi:
end:

#
# IntToCInt: convert amino acid integer to a codon integer from 1..64
#

IntToCInt := proc(AA:{posint,'$'})
  #AToInt returns 0 to 21- 0 for error, 21 for X
  if AA = '$' or AA = 22 then return([49,51,57]) fi:
  if AA = 0 or AA > 22 then error('not a valid amino acid ')
  elif AA = 21 then return([])
  else AToCInt_array[AA] fi:
end:


AToCodon_array := [
['GCA', 'GCC', 'GCG', 'GCT'], ['AGA', 'AGG', 'CGA', 'CGC', 'CGG', 'CGT'],
['AAC', 'AAT'], ['GAC', 'GAT'], ['TGC', 'TGT'], ['CAA', 'CAG'],
['GAA', 'GAG'], ['GGA', 'GGC', 'GGG', 'GGT'], ['CAC', 'CAT'],
['ATA', 'ATC', 'ATT'], ['CTA', 'CTC', 'CTG', 'CTT', 'TTA', 'TTG'],
['AAA', 'AAG'], ['ATG'], ['TTC', 'TTT'], ['CCA', 'CCC', 'CCG', 'CCT'],
['AGC', 'AGT', 'TCA', 'TCC', 'TCG', 'TCT'], ['ACA', 'ACC', 'ACG', 'ACT'],
['TGG'], ['TAC', 'TAT'], ['GTA', 'GTC', 'GTG', 'GTT'] ]:

#
# AToCodon: convert 1 letter amino acid to 3 letter codon list
#
AToCodon := proc(AA:string) 
  if AA = '$' then return([TAA,TAG,TGA]) fi:

  aa := AToInt(uppercase(AA)):

  if aa = 0 then error('not a valid amino acid code') 
  elif aa = 21 then return([]) 
  else AToCodon_array[aa] fi:

end:

#
# IntToCodon: convert amino acid integer to 3 letter codon list
#
IntToCodon := proc(AA:integer) 

  if AA <= 0 or AA > 22 or AA=21 then error('not a valid amino acid code') 
  elif AA = 22 then return([TAA,TAG,TGA]) 
  else AToCodon_array[AA] fi:

end:

MakeOrderedCodonList := proc()

  printf('codon_list := ['):
  for b1 in [A,C,G,T] do
    for b2 in [A,C,G,T] do
      for b3 in [A,C,G,T] do
        printf('''%s'',',b1.b2.b3):
      od:
    od:
  od:
  printf('];'):

end:


#
# CodonToCInt (NucToCode): convert codon to codon position
#
CodonToCInt_list := [-100,0,1,2,3,3,-100];
NucToCode := proc( bbb:string )
  if length(bbb) < 3 then 0
  else max( 0,
	16 * CodonToCInt_list[BToInt(bbb[1])+1] +
	4 * CodonToCInt_list[BToInt(bbb[2])+1] +
	CodonToCInt_list[BToInt(bbb[3])+1] + 1 ) fi
end:
CodonToCInt := eval(NucToCode):

#
# CIntToCodon (CodeToNuc): convert codon position to codon
#
CodeToNuc := proc (pos: posint)
  if pos > 64 then
    error ('Invalid codon position')
  fi;
  p1 := pos - 1;
  p3 := mod(p1, 4); p1 := (p1 - p3) / 4;
  p2 := mod(p1, 4); p1 := (p1 - p2) / 4;
  IntToB(p1+1).IntToB(p2+1).IntToB(p3+1)
end:
CIntToCodon := eval(CodeToNuc):


#
# CIntToInt (CodonToInt): convert codon position to amino acid integer
#
# the old name was not retained as it is the new name for something else
CIntToInt_array := [ 
12,  3, 12,  3, 17, 17, 17, 17, 2, 16,  2, 16, 10, 10, 13, 10, 
 6,  9,  6,  9, 15, 15, 15, 15, 2,  2,  2,  2, 11, 11, 11, 11, 
 7,  4,  7,  4,  1,  1,  1,  1, 8,  8,  8,  8, 20, 20, 20, 20, 
22, 19, 22, 19, 16, 16, 16, 16, 22,  5, 18,  5, 11, 14, 11, 14 ]:

CIntToInt := proc(cod:posint)
   if cod <= 64 then CIntToInt_array[cod]
   else error('codon Int too high') fi
end:

#
# CIntToA (CodonToA): convert codon position to one amino acid letter
#
# the old name was not retained as it is the new name for something else
CIntToA := proc (codon:posint) 
   if codon > 64 then error('invalid codon integer value') fi: 
   IntToA(CIntToInt_array[codon]) end:

#
# CIntToAAA (CodonToAAA): convert codon position to three letter amino acid name
#
CodonToAAA := proc(codon:posint) 
   if codon > 64 then error('invalid codon integer value') fi: 
   IntToAAA(CIntToInt_array[codon]) end:
CIntToAAA := eval(CodonToAAA):

#
# CIntToAmino (CodonToAmino): convert codon position to amino acid name
#
CIntToAmino := proc(codon:posint)
   if codon > 64 then error('invalid codon integer value') fi: 
   IntToAmino(CIntToInt_array[codon]) end:
CodonToAmino := eval(CIntToAmino):

# Alternative genetic codes taken from 
#	http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c#SG11
# and edited to preserve the AAs and Start lines
# (in this format it is easier to maintain and more efficient to load)

#  Base1  = TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG
#  Base2  = TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG
#  Base3  = TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG

TranslationCodeSource := [
['1. The Standard Code (transl_table=1)',
 'FFLLSSSSYY$$CC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '---M---------------M---------------M----------------------------'],
['2. The Vertebrate Mitochondrial Code (transl_table=2)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS$$VVVVAAAADDEEGGGG',
 '--------------------------------MMMM---------------M------------'],
['3. The Yeast Mitochondrial Code (transl_table=3)',
 'FFLLSSSSYY$$CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '----------------------------------MM----------------------------'],
['4. The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code (transl_table=4)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '--MM---------------M------------MMMM---------------M------------'],
['5. The Invertebrate Mitochondrial Code (transl_table=5)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG',
 '---M----------------------------MMMM---------------M------------'],
['6. The Ciliate, Dasycladacean and Hexamita Nuclear Code (transl_table=6)',
 'FFLLSSSSYYQQCC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['7. Table has been deleted (transl_table=7)','',''],
['8. Table has been deleted (transl_table=8)','',''],
['9. The Echinoderm and Flatworm Mitochondrial Code (transl_table=9)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG',
 '-----------------------------------M---------------M------------'],
['10. The Euplotid Nuclear Code (transl_table=10)',
 'FFLLSSSSYY$$CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['11. The Bacterial and Plant Plastid Code (transl_table=11)',
 'FFLLSSSSYY$$CC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '---M---------------M------------MMMM---------------M------------'],
['12. The Alternative Yeast Nuclear Code (transl_table=12)',
 'FFLLSSSSYY$$CC$WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-------------------M---------------M----------------------------'],
['13. The Ascidian Mitochondrial Code (transl_table=13)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['14. The Alternative Flatworm Mitochondrial Code (transl_table=14)',
 'FFLLSSSSYYY$CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['15. Blepharisma Nuclear Code (transl_table=15)',
 'FFLLSSSSYY$QCC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['16. Chlorophycean Mitochondrial Code (transl_table=16)',
 'FFLLSSSSYY$LCC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['17. Table does not exist (transl_table=17)','',''],
['18. Table does not exist (transl_table=18)','',''],
['19. Table does not exist (transl_table=19)','',''],
['20. Table does not exist (transl_table=20)','',''],
['21. Trematode Mitochondrial Code (transl_table=21)',
 'FFLLSSSSYY$$CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG',
 '-----------------------------------M---------------M------------'],
['22. Scenedesmus obliquus mitochondrial Code (transl_table=22)',
 'FFLLSS$SYY$LCC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '-----------------------------------M----------------------------'],
['23. Thraustochytrium Mitochondrial Code (transl_table=23)',
 'FF$LSSSSYY$$CC$WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG',
 '--------------------------------M--M---------------M------------']]:
AltGenCode := proc( gencode:posint, codon:string, pos:posint )
global AltGenCode_array;
if not assigned(AltGenCode_array) then
     n := length(TranslationCodeSource);
     AltGenCode_array := CreateArray(1..n,1..64):
     for i to n do
	# ['T','C','A','G'] is [4,2,1,3] in integers
	Ti := TranslationCodeSource[i];
	if Ti[2]='' then next fi;
	j := 1;
	for B1 in [4,2,1,3] do for B2 in [4,2,1,3] do for B3 in [4,2,1,3] do
	    k := 16*B1 + 4*B2 + B3 - 20;
	    if Ti[3,j]='-' then AltGenCode_array[i,k] := [Ti[2,j]]
	    else AltGenCode_array[i,k] := [Ti[2,j],Ti[3,j]] fi;
	    j := j+1
	    od od od;
	od;
     fi;
if length(codon) <> 3 then error(codon,'codon must be of length three')
elif gencode > length(AltGenCode_array) then
     error(args,'invalid alternative genetic code') fi;
index := CodonToCInt(uppercase(codon));
if index=0 then ['X']	# it contains an invalid base or X
elif nargs=3 then
     if pos>1 then AltGenCode_array[gencode,index,1..1]
     else AltGenCode_array[gencode,index,2..-1] fi
else AltGenCode_array[gencode,index] fi
end:



Ascii_symbols :=
  ['NULL','SOH','STX','ETX','EOT','ENQ','ACK','BEL','BS','HT','LF',
  'VT','FF','CR','SO','SI','DLE','DC1','DC2','DC3','DC4',
  'NAK','SYN','ETB','CAN','EM','SUB','ESC','FS','GS','RS',
  'US','SP','!','"','#','$','%','&','''','(',
  ')','*','+',',','-','.','/','0','1','2',
  '3','4','5','6','7','8','9',':',';','<',
  '=','>','?','@','A','B','C','D','E','F',
  'G','H','I','J','K','L','M','N','O','P',
  'Q','R','S','T','U','V','W','X','Y','Z',
  '[','\\',']','^','_','`','a','b','c','d',
  'e','f','g','h','i','j','k','l','m','n',
  'o','p','q','r','s','t','u','v','w','x',
  'y','z','{','|','}','~','DEL'];

# IntToAscii is now in the kernel
# AsciiToInt is now in the kernel

# Nici's system for using one character codes for codons
#
#T = 0, C = 1, A = 2, G = 3
#
#ascii code = 16*(codon 1) + 4*(codon 2) + (codon 3) + 48
#
#i.e.
#
#TTT =  48 (ascii '0')
#TTC =  49 (ascii '1')
#...
#GGA = 110 (ascii 'n')
#GGG = 111 (ascii 'o')
#
#I then use (ascii code - 48) as an index into the string
#"FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"

GenCodeToCodonSymbol := proc (cs:string)
   if length(cs) <> 3 then error('length of codon must be 3 characters') fi;
   val := [0,0,0];
   for i to 3 do
    val[i] := SearchArray(cs[i],[T,C,A,G]) -1;
   od;
   asciicode := 16*(val[1]) + 4*(val[2]) + (val[3]) + 48;
   IntToAscii(asciicode);
end:

CodonSymbolToGenCode := proc(str:string)
  if str < '/' or str > 'I' then error('invalid input'); fi;
  ret := '';
  index := [T,C,A,G];

  for i to length(str) do
    num   := AsciiToInt(str)-48;
    rem := num-iquo(num,16)*16;
    one   := index[iquo(num,16)+1];
    two   := index[iquo(rem,4)+1];
    rem := rem-iquo(rem,4)*4;
    three := index[rem+1];
    ret := ret.one.two.three
  od;

end:

CodonSymbolToA := proc(cs:string)
  index := 'FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG';
  aastring := CreateString(length(cs));
  for i to length(cs) do
    aastring[i] := IntToA(CodonToInt(CodonSymbolToGenCode(cs[i])));
  od;
  aastring;
end:

