#
# Entry( ... ) return an entry
#
# Gaston H. Gonnet (Mar 1991)
# redone OO GhG (Jan 1992)
# redone functional GhG (Nov 2001)
#
Entry := proc( a ) option polymorphic;
if not type(DB,database) then error('DB must be assigned a database')
elif nargs <> 1 then
res := [];
for z in args do res := append(res,Entry(z)) od;
return(op(res))
elif type(a,integer) then
if a < 1 or a > DB[TotEntries] then
error(a,'is an entry number out of range') fi;
z := DB[ string, DB['entry',a]+1 .. DB['entry',a+1] ];
while length(z) > 8 and z[-4..-1] <> '' do z := z-1 od;
return( z )
elif type(a,list) then return( Entry(op(a)) )
elif type(a,string) then
if length(a)>10 and a[1..3]='' and a[-4..-1]='' then return(a) fi;
z := GetOffset(a);
if z>=0 and z '' then return( -3+z+4 ) fi
fi;
error(a,'is an incorrect argument for Entry')
end:
#
# Entry_print - since an entry is a string, this has to be called
# specially from print.
#
# Print an entry in a nicer ascii format
#
Entry_print := proc( e:string ) option internal;
if not type(e,Entry) then error(e,'is not a DB Entry') fi;
for t in ['ID','AC','DE','OS','OC','KW','RES','SEQ'] do
et := SearchTag(t,e);
if et <> '' then printf( '%-4s %s\n', t, et ) fi od;
end:
Entry_type := proc( e ) option internal;
type(e,string) and length(e) > 7 and e[1..3] = '' and
SearchString('', e[-7..-1])>-1
end:
#
# PatEntry( ... ) Place holder for entries to the Pat index
# of the DB database
#
# Gaston H. Gonnet (Mar 1991)
#
PatEntry := proc( a ) option polymorphic;
if not type(DB,database) then error('DB must be assigned a database') fi;
if nargs <> 1 then
res := copy([args]);
for i to nargs do res[i] := op(PatEntry(res[i])) od;
# attempt to compact PatEntry
res := eval(res);
if length(res) <= 1 then return( noeval(PatEntry(op(res))) ) fi;
res := sort( res, x -> If(type(x,range),x[1],x) );
j := 1;
for i from 2 to length(res) do
aa := res[j]; bb := res[i];
if type(aa,integer) then
if type(bb,integer) then
# both entries are single entries
if aa+1 = bb then res[j] := aa..bb
elif bb > aa then j := j+1; res[j] := bb fi
else
# integer followed by range
if aa+1 >= bb[1] then res[j] := aa..bb[2]
else j := j+1; res[j] := bb fi
fi
else if type(bb,integer) then
# range followed by integer
if aa[2] >= bb-1 then res[j] := aa[1] .. max(aa[2],bb)
else j := j+1; res[j] := bb fi
else
# range followed by range
if aa[2]+1 >= bb[1] then res[j] := aa[1] .. max(aa[2],bb[2])
else j := j+1; res[j] := bb fi
fi
fi
od;
res := res[1..j];
return( noeval(PatEntry(op(res))) )
fi;
if type(a,posint) then
if a > DB[TotAA] then error('entry out of range') fi;
return( noeval(PatEntry(a)) )
elif type(a,posint..integer) then
if a[1] <> a[2]+1 then
if a[1] > a[2] or a[2] > DB[TotAA] then
error('range entry out of bouds') fi;
return( noeval(PatEntry(a)) )
else return( noeval(PatEntry()) )
fi
elif type(a,string) then return( SearchSeqDb(a) )
elif type(a,list) then return( PatEntry(op(a)) )
else error(a,'is an incorrect argument') fi
end:
PatEntry_Entry := proc(e) option internal;
res := [];
for m in e do
if type(m,posint) then res := append(res,GetEntryNumber(DB[Pat,m]))
elif type(m,posint..posint) then
for j from m[1] to m[2] do
res := append(res,GetEntryNumber(DB[Pat,j])) od
else error(e,m,'invalid PatEntry entry') fi
od;
Entry(op(res))
end:
PatEntry_print := proc() option internal;
for z in [PatEntry_Sequence(args)] do lprint(z) od end:
PatEntry_Sequence := proc(e) option internal;
res := [];
for m in e do
if type(m,posint) then res := append(res,DB[Pat,m]+DB[string])
elif type(m,posint..posint) then
for j from m[1] to m[2] do
res := append(res,DB[Pat,j]+DB[string]) od
else error(e,m,'invalid PatEntry entry')
fi
od;
for i to length(res) do
z := SearchString('<',res[i]);
if z <= 0 then res[i] := Sequence(res[i])
else res[i] := res[i,1..z] fi
od;
op(res)
end:
PatEntry_string := op(PatEntry_Sequence):
PatEntry_Match := proc() option internal;
s := [PatEntry_Sequence(args)];
if length(s)<>2 then error('two offsets/entries are needed for a Match') fi;
Match(s[1],s[2])
end:
PatEntry_type := structure({posint,posint..posint},noeval(PatEntry)):
#################################################
# #
# string( r ) #
# #
# Convert its argument(s) to a single string #
# #
#################################################
string := proc( a ) option polymorphic;
description 'Converts argument to a string. Multiple arguments are concatenated';
if nargs=0 then ''
elif nargs > 1 then
r := '';
for i to nargs do r := r . (string(args[i])) od;
r
elif type(a,integer) then sprintf( '%.0f', a )
elif type(a,numeric) then
if assigned(NumberFormat) and type(NumberFormat,string) then
sprintf( NumberFormat, a )
else sprintf( '%.8g', a ) fi
elif type(a,string) then a
else sprintf( '%a', a ) fi
end:
string_Sequence := proc(z) option internal;
if length(z) > 10 and z[1] = '<' then
t := SearchTag('SEQ',z);
if t='' then error(z,'cannot locate a .. field') fi;
return(t)
# Allow 'X' as a valid AA, DNA or RNA
else for i to length(z) do if AToInt(z[i]) < 1 then break fi od;
if i > length(z) then return(z)
elif length(z)-i >= 5 and z[i..i+5]='' then return(z[1..i-1]) fi;
# purely RNA (if it is DNA, it comes out above)
for i to length(z) do
if not member(z[i],{'A','C','G','U','X'}) then break fi od;
if i > length(z) then return(z)
elif length(z)-i >= 5 and z[i..i+5]='' then return(z[1..i-1]) fi;
error(z,'is not a valid sequence (neither AA nor RNA)')
fi
end:
#################################################################
# Sequence: peptide or nucleotide sequences #
# (just the amino acids or the bases, excluding the rest) #
# #
# Gaston H. Gonnet (Dec 1991) #
# redone Gaston H. Gonnet (Nov 2001) #
# redone Gaston H. Gonnet (Jan 2003) #
#
# fixed so that it doesn't break on the base U or unknown base X
# Markus Friberg (May 2003)
# #
#################################################################
Sequence := proc( a ) option polymorphic;
if nargs <> 1 then seq( procname(args[i]), i=1..nargs )
elif type(a,list) then seq( procname(a[i]), i=1..length(a) )
elif type(a,string) then
if length(a) > 10 and a[1] = '<' then SearchTag('SEQ',a)
else for i to length(a) do
if (AToInt(a[i]) < 1 or AToInt(a[i]) > 20) and
not member(t[i], ['U', 'X']) then
error(a,a[i],'is not a valid sequence') fi od;
a
fi
elif type(a,posint) then # assume it is an offset
if not type(DB,database) then error('DB must be assigned a database') fi;
t := a+DB[string];
for i while (AToInt(t[i]) >= 1 and AToInt(t[i]) <= 20) or
member(t[i], ['U', 'X']) do od;
t[1..i-1]
else Sequence(Entry(a))
fi
end:
Match_Entry := proc( m ) option internal;
[Entry( GetEntryNumber(m[Offset1]), GetEntryNumber(m[Offset2]) )] end:
Match_Sequence := proc( m ) option internal;
if m[Length1]=0 and m[Length2]=0 then
s1 := m[Offset1]+DB[string];
for i1 to length(s1) do
t := AToInt(s1[i1]);
if t<1 or t>20 then break fi od;
s2 := m[Offset2]+DB[string];
for i2 to length(s2) do
t := AToInt(s2[i2]);
if t<1 or t>20 then break fi od;
s1[1..i1-1], s2[1..i2-1]
else s1 := m[Offset1]+DB[string];
s2 := m[Offset2]+DB[string];
s1[1..m[Length1]], s2[1..m[Length2]]
fi
end:
# Purpose: ID: data structure for database ID
# Author: Lukas Knecht -- new version GhG
# Created: 8 Mar 1995 -- 24 Nov 2001
#
ID := proc(id:{string,list}) option polymorphic;
if not type(DB,database) then error('DB must be assigned a database')
elif nargs <> 1 then
r := [];
for z in {args} do r := append(r,op(ID(z))) od;
return( noeval(ID(op(r))) )
elif type(id,list) then return( ID(op(id)) )
elif length(id) > 42 then #id longer than 42 is assumed to be a sequence
if id[1]='<' then
z := SearchTag('ID',id);
if z <> '' then return( noeval(ID(z)) ) fi
fi;
z := GetOffset(id);
if z >= 0 and z < DB[TotChars] then
return( noeval( ID( SearchTag('ID',Entry(GetEntryNumber(z))))) ) fi;
error(id,'is not a valid ID')
else noeval(ID(id)) fi
end:
Match_ID := proc(m) option internal; ID(Match_Entry(m)) end:
PatEntry_ID := proc() option internal; ID(PatEntry_Entry(args)) end:
ID_Entry := proc(id) option internal;
res := [];
for z in id do res := append(res,SearchID(z)) od;
op(res)
end:
ID_Sequence := proc() option internal; Sequence(ID_Entry(args)) end:
# Purpose: AC: data structure for database AC Accession number
# Author: Lukas Knecht -- new version GhG
# Created: 8 Mar 1995 -- 24 Nov 2001
#
AC := proc(ac:{string,list}) option polymorphic;
if not type(DB,database) then error('DB must be assigned a database')
elif nargs <> 1 then
r := [];
for z in {args} do r := append(r,op(AC(z))) od;
return( noeval(AC(op(r))) )
elif type(ac,list) then return( AC(op(ac)) )
elif length(ac) > 15 then
# check for an entire entry
if ac[1..3]='' and ac[-4..-1]='' then
z := SearchTag('AC',ac);
if z <> '' then return( noeval(AC(z)) ) fi
fi;
# longer than 15, if all valid aa's it is assumed to be a sequence
for i to length(ac) while AToInt(ac[i]) > 0 do od;
if i <= length(ac) then return( noeval(AC(ac)) ) fi;
z := GetOffset(ac);
if z >= 0 and z < DB[TotChars] then
return( noeval( AC( SearchTag('AC',Entry(GetEntryNumber(z))))) ) fi;
error(ac,'is not a valid AC')
else noeval(AC(ac)) fi
end:
Match_AC := proc(m) option internal; AC(Match_Entry(m)) end:
PatEntry_AC := proc() option internal; AC(PatEntry_Entry(args)) end:
AC_Entry := proc(ac) option internal;
res := [];
for z in ac do res := append(res,SearchAC(z)) od;
op(res)
end:
AC_Sequence := proc() option internal; Sequence(AC_Entry(args)) end:
#############
# Iterators #
#############
Entries := noeval(Entries):
Entries_iterator := proc( a0 )
global DB;
option internal;
a := op(args[1]);
if length(a0)=0 then
if not type(DB,database) then error('DB must be assigned a database') fi;
Entries_iterator( Entries(DB) )
elif length(a0) > 1 then Entries_iterator( Entries([args]) )
elif type(a,database) then
oldDB := DB; DB := a;
for i to a[TotEntries] do
DB := a; t := Entry(i); DB := oldDB;
iterate(t)
od
elif type(a,list(integer)) then
if not type(DB,database) then error('DB must be assigned a database') fi;
for i in z do
if i < 1 or i > DB[TotEntries] then
error(i,'is an invalid range for an entry in',DB ) fi;
iterate(Entry(i))
od
elif type(a,list) then
if not type(DB,database) then error('DB must be assigned a database') fi;
for z in a do iterate(Entry(z)) od
else error(a,'is an invalid argument to iterate on Entries') fi;
end:
Sequences := noeval(Sequences):
Sequences_iterator := proc( a0 )
global DB; option internal;
a := op(args[1]);
if length(a0)=0 then
if not type(DB,database) then error('DB must be assigned a database') fi;
Sequences_iterator( Sequences(DB) )
elif length(a0) > 1 then Sequences_iterator( Sequences([args]) )
elif type(a,database) then
oldDB := DB; DB := a;
for i to a[TotEntries] do
DB := a; t := Sequence(Entry(i)); DB := oldDB;
iterate(t)
od
elif type(a,list(integer)) then
if not type(DB,database) then error('DB must be assigned a database') fi;
for i in z do
if i < 1 or i > DB[TotEntries] then
error(i,'is an invalid range for an entry in',DB ) fi;
iterate(Sequence(Entry(i)))
od
elif type(a,list) then
if not type(DB,database) then error('DB must be assigned a database') fi;
for z in a do iterate(Sequence(z)) od
else error(a,'is an invalid argument to iterate on Sequences') fi;
end: