# Purpose: Convert compressed SwissProt flat file to Darwin format
# Author:  Lukas Knecht
# Created: 13 Sep 1993
#
SpToDarwin := proc(flatfile: string, darwinfile: string, descr: string,
		  compressed: boolean)
  description 'Converts a SwissProt flat file (flatfile) into a Darwin
  loadable file (darwinfile). The actual data is prefixed by descr
  which should contain the database name (DBNAME tag) and release
  (DBRELEASE tag).
  If compressed is specified and true, the flat file is read using zcat.';
  TagsToKeep := ['ID','AC','DE','OS','OC','KW','FT','DR','SEQ']:
  LastTag := '':
  hasFT := hasDR := false:
  Structure := CreateString(30000):
  SeqLen := StructLen := Resolution := 0:
  Set(printgc=false):
  OpenWriting(darwinfile);
  printf('%s\n', descr);
  if nargs > 3 and compressed then
    OpenPipe('zcat '.flatfile)
  else
    OpenReading(flatfile)
  fi;
  do
    t := ReadRawLine();
    if t = EOF then break fi;
    tag := t[1..2];
    if tag = '//' then
      if LastTag <> '' then
	if LastTag <> 'FT' and LastTag <> 'DR' or hasFT or hasDR then
	  printf('</%s>', LastTag)
	fi;
	if StructLen > 0 then
	  printf('<SEC>%s</SEC><RES>%g</RES>', 
		 Structure[1..min(SeqLen,length(Structure))], Resolution);
	  for i to StructLen do Structure[i] := ' ' od
	fi;
	printf('</E>\n');
	SeqLen := StructLen := Resolution := 0;
	LastTag := '';
	hasFT := hasDR := false
      fi
    elif tag = '  ' then
      tag := 'SEQ'
    elif tag = 'RP' then
      if CaseSearchString('X-RAY', t) >= 0 then
	par := CaseSearchString('(', t);
	if par >= 0 then
	  res := sscanf(par+1+t, '%g');
	  if length(res) = 1 and (Resolution <= 0 or res[1] < Resolution) then
	    Resolution := res[1]
	  fi
	fi
      elif CaseSearchString('NMR', t) >= 0 and Resolution = 0 then
	Resolution := -2
      fi
    fi;
    if SearchArray(tag, TagsToKeep) > 0 then
      if tag <> LastTag then
	if LastTag = '' then
	  printf('<E>')
	elif LastTag <> 'FT' and LastTag <> 'DR' or hasFT or hasDR then
	  printf('</%s>', LastTag)
	fi;
	if tag <> 'FT' and tag <> 'DR' then printf('<%s>', tag) fi;
	LastTag := tag;
	blank := 0;
	hasFT := hasDR := false
      fi;
      if tag = 'ID' then
	for p from 3 to length(t) while t[p] = ' ' do od;
	for p from p to length(t) while t[p] <> ' ' do printf('%c', t[p]) od
      elif tag = 'SEQ' then
	for p from 3 to length(t) do
	  if t[p] > ' ' then
	    SeqLen := SeqLen + 1;
	    printf(If(AToInt(t[p]) = 0, 'X', t[p]))
	  fi
	od
      elif tag = 'FT' then
	res := sscanf(2+t, '%s %d %d');
	if length(res) = 3 then
	  ch := ' ';
	  if res[1] = 'TURN' then ch := 't'
	  elif res[1] = 'HELIX' then ch := 'h'
	  elif res[1] = 'STRAND' then ch := 's'
	  elif res[1] = 'ACT_SITE' or res[1] = 'DISULFID' then 
	    if hasFT then
	      printf(' ')
	    else
	      printf('<FT>'); 
	      hasFT := true
	    fi;
	    printf('%s %d %d', res[1], res[2], res[3]) 
	  fi;
	  if ch > ' ' and res[2] > 0 and res[3] < length(Structure) then
	    for i from res[2] to res[3] do
	      Structure[i] := If(Structure[i] = ' ', ch, '?')
	    od;
	    StructLen := max(StructLen, res[3])
	  fi
	fi
      elif tag = 'DR' then
	dr := sscanf(t[3..-1], '%s %s');
	if length(dr) = 2 and dr[1] = 'PDB;' then
	  if hasDR then
	    printf(' ')
	  else
	    printf('<DR>');
	    hasDR := true
	  fi;
	  printf('%s %s', dr[1], dr[2])
	fi
      else
	for p from 3 to length(t) do
	  if t[p] > ' ' then
	    if blank = 1 then printf(' ') fi;
	    blank := 2;
	    printf('%c', t[p])
	  elif blank = 2 then 
	    blank := 1
	  fi
	od
      fi
    fi
  od;
  OpenWriting(terminal)
end:

