# Purpose: Various variation indices over multiple alignments
# Author:  Lukas Knecht
# Created: 14 Apr 1993
#
KWIndex := proc (ma: array (string))
  ls := length (ma[1]);
  res := CreateArray (1..ls);
  for i to ls do
    exists := CreateArray (1..21);
    nrof := CreateArray (1..21);
    for j to length (ma) do
      p := AToInt (ma[j,i]);
      if p > 0 then
	exists[p] := 1;
	nrof[p] := nrof[p] + 1
      fi
    od;
    res[i] := sum (exists) / (max (nrof) / sum (nrof))
  od;
  res
end:

PrintIndex := proc (ma: array (string), index: array)
  for i to length (ma[1]) do
    printf ('%4d ', i);
    for j to length (ma) do printf ('%c', ma[j,i]) od;
    for j from 2 to nargs do
      if args[j,i] = UNDEFINED then
	printf ('     n/a')
      elif type (args[j,i], numeric) then
	printf ('%8.2f', args[j,i])
      else		
	printf ('%8s', args[j,i])
      fi
    od;
    printf ('\n')
  od
end:

PlotIndex := proc (index: array, title: string)
  ma := mi := 0;
  for i in index do
    if type (i, numeric) then
      ma := max (ma, i);  mi := min (mi, i)
    fi
  od;
  pts := (ma - mi) / 500;
  n := length (index);
  pd := [LTEXT (0, ma + 20 * pts,title),
	 RTEXT (n/30, 0, '0'),
	 RTEXT (n/30, ma, sprintf ('%.2f', ma)),
	 LINE (n/20 + 0.7, 0, n/20 + n + 0.3, 0)];
  if mi < -20 * pts then
    pd := append (pd, RTEXT (n/30, mi, sprintf ('%.2f', mi)))
  fi;
  for i to n do
    if type (index[i], numeric) then
      x := i + n/20;
      pd := append (pd, LINE (x-0.3, 0, x-0.3, index[i]),
		    LINE(x-0.3,index[i],x+0.3,index[i]),
		    LINE(x+0.3,index[i],x+0.3,0))
    fi;
    if mod (i, 5) = 0 then
      if mod (i, 100) = 0 then
	j := i
      elif mod (i, 10) = 0 then
	j := mod (i, 100)
      else
	j := 5
      fi;
      pd := append (pd, LINE (x, 0, x, -2 * pts),
		    CTEXT (x, - 12 * pts, sprintf ('%d', j)))
    fi
  od;
  DrawPlot (pd)
end:

module external ProbIndex;
ProbIndex := proc (ma: array (string), t: Tree)
  cnt := CreateArray (1..1);
  Mtree := MatrixTree_R (t, 0, cnt);
  ls := length (ma[1]);
  res := CreateArray (1..ls);  
  for i to ls do
    x := ProbIndex_R (i, ma, Mtree);
    res[i] := - log10 (AF * x[1]) - x[2]
  od;
  res
end:

MatrixTree_R := proc (t: Tree, h: numeric, cnt: array(integer,1))
  M := exp (max (h - t[2], 0) * NewLogPAM1);
  if type (t, Leaf) then
    cnt[1] := cnt[1] + 1;
    Leaf (t[1], t[2], cnt[1], M)
  else
    Tree (MatrixTree_R (t[1], t[2], cnt), 
	  t[2], MatrixTree_R (t[3], t[2], cnt), M)
  fi
end:

AllOnes := CreateArray (1..20, 1):

ProbIndex_R := proc (i: posint, ma: array (string), t: Tree)
  global AllOnes;
  if type (t, Leaf) then
    p := AToInt (ma[t[3],i]);
    if p = 0 or p > 20 then
      AllOnes, 0
    else
      t[4,p], 0
    fi
  else
    t1 := ProbIndex_R (i, ma, t[1]);
    t3 := ProbIndex_R (i, ma, t[3]);
    res := zip (t1[1] * t3[1]) * t[4];
    s := sum (res);
    res / s, t1[2] + t3[2] + log10 (s)
  fi
end:
end:

module external ScaleIndex;

ew := [
0.9999999999999970, 0.9827324942195341, 0.9835588673784165, 0.9839900293370912,
0.9850885706265112, 0.9855155780695342, 0.9966272052554127, 0.9868665429859256,
0.9872189289629860, 0.9956293160533884, 0.9908958399791676, 0.9920064792633211,
0.9899999980240459, 0.9947161989513937, 0.9885481872685423, 0.9945065034694072,
0.9887645951649017, 0.9940530419756790, 0.9938397918787809, 0.9893829328160593]:
lambda := zip (ln (ew)):
V := [ [-3.2439377394792690e-01, -2.9550595347167818e-01,
1.1307191854285602e-01, 1.1271368781659409e-02, -1.6372244037601215e-02,
-1.1416075586222010e-01, 1.1784561806583634e-01, 4.7158178808041912e-01,
5.6368760181298860e-01, -4.9069187064998077e-02, -4.4465605237540029e-01,
9.6164705825698429e-02, 3.3987922297227863e-01, 5.3310475237758122e-02,
-4.4722743171682727e-01, -7.2395340120428364e-02, -2.7355961137271001e-01,
-1.1594893825342317e-01, 3.8196609894103270e-02, -4.2072818098228160e-02],
[-2.2047793839636420e-01, -1.0074731284855570e-02, 1.9517666968019494e-02,
7.3677590605023990e-02, -7.9559901949340511e-02, 3.4597289788321511e-01,
1.2859188868701873e-01, -1.9977510706507506e-01, 3.6124894350091719e-01,
6.6195664553253430e-02, 8.9902062704709670e-02, -6.7231239359245887e-01,
5.5540991913736425e-02, -1.8461463920730437e-01, 1.6830261403517205e-01,
-5.9633547611458247e-02, 1.0370065253177891e-02, 1.8018749523087729e-01,
1.5466067253043816e-01, -1.5238168186054618e-01],
[-1.8809976426390029e-01, -5.0433479268754432e-02, 1.0159400084424602e-01,
7.2251461438380959e-02, -1.5190619659257536e-02, 2.3446662224257631e-01,
1.4347695926258780e-01, 4.8281179797283164e-01, -6.3415386284741193e-02,
6.8383536663757807e-02, -1.9145932327320852e-02, 1.5426709928755142e-01,
-5.4322019788233401e-02, -7.2874067551289681e-02, 5.5611569582810627e-01,
-1.9172902153222690e-02, 1.5981697382325113e-01, 1.4383043702057258e-01,
4.5413781209350981e-02, 3.2818151052800071e-01],
[-2.2179482656875479e-01, 7.5146475044656092e-03, -7.8663086851073500e-03,
-1.6677983177143454e-01, -2.5861125986192612e-02, -2.8246569328416188e-01,
2.2775414578527273e-01, -2.9367992908290808e-01, 3.8954689311073065e-01,
9.8601950955855430e-02, 2.9673440631332809e-01, 5.2123190393024277e-01,
-5.2863505935265565e-02, -1.5862154958747096e-01, 2.1857233368057766e-01,
-3.0138303745824865e-02, 4.3551806205442663e-03, 2.1364631704685180e-01,
1.0032611855600804e-01, -1.6638577979971428e-01],
[-7.7847946383936731e-02, -8.9999909173828635e-03, 2.1686739438504584e-04,
-5.1949177813725024e-04, 1.3968902539682924e-03, -2.2234975109367023e-03,
-3.1821250274902999e-02, -5.6701849989038884e-03, -1.7703934787313729e-02,
-3.6085900133527866e-02, 7.8122690110242740e-02, -1.3364482023339482e-02,
-2.3368601493080034e-02, 2.6178222155666719e-01, 9.9886291354973518e-03,
-6.7031226797448262e-01, 6.2924835287981049e-03, 2.1928026054178710e-02,
-1.7065352155104848e-02, -4.2417545895163354e-03],
[-1.5573039667517807e-01, -3.9734861457124890e-02, -3.5905128140368038e-02,
-8.1772779660484474e-01, -1.2889065604078212e-01, 1.0820356766116208e-01,
8.1135519695903741e-02, 9.2429284462111086e-02, -2.0363368362236081e-01,
1.9604866086373136e-02, 5.9574661450452794e-02, -6.1954233687956752e-02,
-2.5730659965048145e-02, -1.0752376036407839e-01, -2.0180843962629155e-01,
-2.3722705641801348e-02, -6.2260805674385225e-03, 8.3634108372660554e-02,
6.0288272836101407e-02, 1.9079498046818549e-02],
[-2.4111996357874529e-01, -2.7964742617810857e-03, 2.7557513817796756e-02,
4.7340481599898943e-01, 5.6188225230979154e-02, 4.0498327044836979e-01,
1.9141367189387273e-01, 7.1452418413232771e-02, -3.3892866103521069e-01,
5.3900839203114476e-02, 2.2871543826947160e-01, 2.6198655327192866e-01,
-1.1136007805673039e-03, -1.9590792103297000e-01, -4.7069409861984951e-01,
-4.7891096151476735e-02, -9.3959304812380073e-02, 1.7670770882324660e-01,
1.2942451154261089e-01, -2.9931843176270501e-01],
[-3.0365285463315517e-01, -7.4046983978736007e-03, 2.5932859056450860e-02,
3.8674585985837570e-03, 9.9499152667707081e-03, -5.1371332809790008e-03,
4.5910662559575199e-01, -6.1117851670612969e-02, -7.3841055180453044e-02,
3.8026683621758883e-01, 1.2519996756972609e-01, -1.0752122079345734e-01,
-8.9902591548321753e-02, 8.0755548003803179e-01, -6.0326397259313274e-03,
6.0519360804976907e-01, 4.3165338400693205e-02, -1.6585591011739945e-01,
-1.4590499327956996e-01, -4.6518374319675461e-02],
[-9.7155467698252312e-02, 9.7670143782104570e-03, -4.6595185782693574e-03,
8.5906579398436170e-02, 3.5008464361553034e-02, -4.1708199832762735e-02,
1.6643414914236914e-02, -9.7076798759957048e-02, 1.0158765188425892e-01,
4.0628489660734256e-02, -4.5158865852346110e-02, -2.3079467952079675e-02,
-3.2751543070806349e-01, -5.4831449782679453e-02, -3.0027972491597121e-01,
-1.7431303862505181e-02, 2.7010267828806095e-01, 1.2650707598906577e-01,
-4.9988636442632073e-02, 3.7857635812799179e-01],
[-2.3971455213363424e-01, -4.8691850682920051e-01, -5.1268844483264786e-01,
7.2172714904137208e-02, -1.7034464708653754e-01, -1.8863636647651889e-04,
-2.6986114990109339e-01, -8.7644595700762957e-03, -5.3498276150408322e-02,
-3.5093669283535772e-01, -4.9036918327695467e-02, 3.5538722823659157e-02,
-2.3282560097657434e-01, 6.1983758923884483e-02, 1.1129622361778274e-01,
1.4253027791129738e-01, 2.9240118931749404e-01, -1.6670754376007652e-01,
2.2603877917309242e-01, -2.4767615707383317e-01],
[-3.8970176087350006e-01, 4.1859709874279200e-02, 5.7309943926478438e-02,
-1.3141009652474937e-02, 5.2983025446878085e-01, 5.1702986430285654e-02,
-4.9339700164938521e-01, -3.7713436788230692e-02, 6.0263450334672987e-02,
-5.1388235058183873e-01, 4.5490633205481523e-01, -3.4920366731361591e-02,
-6.6947029750105191e-02, 9.0237265638953484e-02, 1.4639671022630250e-02,
2.9153509519530868e-01, -6.0789178168613756e-01, -1.9776611461947921e-01,
2.2312984629186219e-01, 5.0410149954416594e-01],
[-2.4296564829928979e-01, 3.8036873783285706e-02, -3.6318048734572068e-02,
1.7078227745070151e-01, 2.0402802292175384e-01, -7.3349974596525047e-01,
1.4914280032827351e-01, 2.2461698258268320e-01, -3.0977482924658611e-01,
3.6730704565731284e-02, 7.3504963546319829e-02, -3.6956146737532136e-01,
5.1944831952803458e-02, -1.9806862459024102e-01, 2.5706931850890593e-02,
-4.9698218011336059e-02, 1.8246278043360675e-03, 1.7138344529392488e-01,
1.4411306369781476e-01, -7.8288457125525054e-02],
[-9.4204257756041784e-02, 1.0951082820266152e-01, 1.2691546134134077e-01,
9.7812937504766287e-02, -7.3123157977285669e-01, -8.1988730998010129e-02,
-9.0989761191817800e-02, -2.3647545459555140e-03, -3.5266151132236710e-02,
-1.0113354685757386e-01, 4.7046251494323621e-02, -4.9091062651286897e-03,
-1.6287547309825478e-03, 1.6656517655477522e-02, -1.9370800445445618e-02,
4.8260733289731371e-02, -6.4702211212265021e-02, -2.9301002826804201e-02,
5.4482960891987418e-02, 6.9529163455605295e-02],
[-1.6836189776020186e-01, 2.3640652532180397e-03, -9.7830612961273787e-03,
-4.3384925283263803e-03, 2.6242325122575509e-02, 4.1998407366905972e-03,
-3.3722707982796368e-01, -3.0759729810328268e-04, 7.5941476768172879e-03,
3.8100521629094844e-02, 1.4572239933188913e-01, 1.9690878830170629e-02,
6.0829326494861213e-01, 4.9621075329404282e-02, -5.9247142206891999e-02,
1.3064735421816284e-01, 3.0914984373594800e-01, 2.7315097650831555e-01,
-4.0193545881325676e-01, 1.3389059803476010e-02],
[-1.8837762647302944e-01, -2.6506170070462966e-04, 9.5045746473391788e-03,
1.6070396075706143e-02, -1.2650329038731187e-02, 9.9545849827353903e-04,
1.3717891758524017e-01, 2.8761764841714775e-03, -2.0856570978317673e-03,
-3.3017745902311497e-02, 1.2768070145500918e-01, -3.6463696395518885e-02,
-7.3930717761521395e-02, -3.0537167337439303e-01, 4.1573411356085443e-02,
-1.5152981864030915e-01, 4.8759202573976861e-02, -6.7709118744009222e-01,
-5.9690153608602170e-01, -2.7378520900241853e-02],
[-2.4955084773360961e-01, 5.0132745530846767e-01, -6.3680274948111770e-01,
4.0914015063136369e-02, -2.9835072878894146e-02, 7.9877642690303077e-02,
1.3677177704054438e-01, -1.4732775012573343e-01, -5.8698489486516174e-02,
2.0382635259298496e-02, -3.3655379505976724e-01, 9.0193782976359826e-02,
2.4893598844936468e-01, -2.2179706502511429e-02, 7.1361936627633460e-02,
-8.0860202975180956e-02, -1.6563540733767848e-01, -1.4039797784181936e-02,
1.1272182521371323e-02, 1.9006224340654279e-01],
[-2.5495256615520118e-01, -3.3748338616973134e-01, 3.7608721374480897e-01,
1.4035912550905524e-02, 8.0053043927632003e-02, 6.9590451208896467e-04,
7.6559869357255006e-02, -5.6107287382533821e-01, -3.3786933215060100e-01,
-5.5315190708399700e-02, -4.0572560406151026e-01, 7.2491335180725147e-02,
2.4571882246255178e-01, -7.0652604727630297e-02, 1.6757168740552528e-01,
-7.3870087308034030e-02, -5.4661413431152328e-02, -2.6968347932136507e-02,
6.5275285711041159e-02, 1.6768722290153507e-01],
[-5.3998711549178569e-02, -1.7592409893681522e-04, 1.1522727993461665e-03,
1.3508049111185896e-03, 1.8658068196716099e-03, -3.6119469682524322e-03,
-1.9816018661724397e-01, 1.5754127719429692e-03, -1.4337364347162230e-03,
5.2504562027423318e-01, -3.1718402484926162e-04, 1.6069961066492808e-02,
-3.9714038321534278e-03, -3.9610818423901788e-02, -1.5909472323053279e-03,
-4.0681457205806217e-02, 3.3090569720357682e-03, -1.4964203591000780e-01,
1.5319513922088676e-01, 4.7492468678988738e-03],
[-1.3530771120634566e-01, -9.6172968531434977e-03, 7.0749506285411982e-03,
-9.0465788205601749e-03, -1.2191668275970086e-02, -5.8182765202937422e-04,
-2.1867554478796963e-01, 1.4414051566948381e-02, -2.6478538656496183e-02,
1.4655044092846956e-01, -1.7607127048579768e-01, -6.9801515194674486e-03,
-4.0328883152337441e-01, 4.0729662172207912e-04, 9.6983832679872248e-02,
4.7311266502472304e-02, -2.7410481334940706e-01, 3.4114785946073756e-01,
-4.3530770889792819e-01, -1.9190774612004441e-01],
[-2.8621450754806166e-01, 5.3902977040670574e-01, 3.7808801603630116e-01,
-1.2196513212598648e-01, 2.7756489635201431e-01, 3.4467976618746524e-02,
-2.2548923396304288e-01, 5.3112831397657399e-02, 3.8699042947816190e-02,
-3.5495149191181963e-01, -2.5044425178569096e-01, 6.3431643145071478e-02,
-1.9290437390721749e-01, 6.8702724144700217e-02, 2.4138257248188716e-02,
7.1858916232326264e-02, 3.9119398345061268e-01, -1.8880257115663285e-01,
2.4128646159674891e-01, -4.1918608103180666e-01]]:
Vinv := 1/V:

AllOnes := CreateArray (1..20, 1):


ScaleIndex := proc (ma: array (string), t: Tree)
  global ScaleIndex_MA, ScaleIndex_Tree, ScaleIndex_I;
  ls := length (ma[1]);
  res := CreateArray (1..ls);
  ScaleIndex_MA := ma;
  ScaleIndex_Tree := t;
  for ScaleIndex_I to ls do
    p := traperror (MaximizeFunc (ScaleIndex_F, -3..3, 0.002));
    if p = lasterror then
      res[ScaleIndex_I] := UNDEFINED
    else
      res[ScaleIndex_I] := p[1]
    fi
  od;
  res
end:

ScaleIndex_F := proc (logscale: numeric)
  cnt := CreateArray (1..1);
  x := ScaleIndex_R (10^logscale, ScaleIndex_Tree, 0, cnt);
  log10 (AF * x[1]) + x[2]
end:

ScaleIndex_R := proc (scale: numeric, t: Tree, h: numeric, 
		      cnt: array(integer,1))
  global AllOnes;
  if type (t, Leaf) then
    cnt[1] := cnt[1] + 1;
    p := AToInt (ScaleIndex_MA[cnt[1],ScaleIndex_I]);
    if p = 0 or p > 20 then
      AllOnes, 0
    else
      pam := scale * max (h - t[2], 0);
      diag := zip (exp (pam * lambda));
      V[p] * zip (diag * Vinv), 0
    fi
  else
    t1 := ScaleIndex_R (scale, t[1], t[2], cnt);
    t3 := ScaleIndex_R (scale, t[3], t[2], cnt);
    pam := scale * max (h - t[2]);
    diag := zip (exp (pam * lambda));
    res := zip (t1[1] * t3[1]) * V * zip (diag * Vinv);
    s := sum (res);
    res / s, t1[2] + t3[2] + log10 (s)
  fi
end:
end:

AllIndices := proc (ma: array (string), t: Tree)
  description 'compute and print the Kabat-Wu, Probabilistic and Scale indices';
  i1 := KWIndex (ma);
  i2 := ProbIndex (ma, t);
  i3 := ScaleIndex (ma, t);
  printf (' Pos ');
  for lbl in Infix(t) do printf ('%c', lbl[1]) od;
  printf ('      KW    Prob   DayMatrixScale\n');
  PrintIndex (ma, i1, i2, i3)
end:

PositionTree := proc (ma: array (string), t: Tree, pos: posint)
  description
  'Creates a tree containing the amino acids of position pos in ma as labels.';
  cnt := If (nargs = 3, CreateArray (1..1), args[4]);
  if type (t, Leaf) then
    cnt[1] := cnt[1] + 1;
    Leaf (ma[cnt[1],pos], t[2..-1])
  else
    Tree (PositionTree (ma, t[1], pos, cnt), t[2],
	  PositionTree (ma, t[3], pos, cnt), t[4..-1])
  fi
end:
