# Purpose: DNA Tools server processing (ParExecuteIPC version) # Author: Gina Cannarozzi # Created: 14 May 2002 # # MainTitle := 'No title specified'; ParseMsg := proc (msg: string) global db, DB, SPDF,msg, MainTitle; # option trace; description 'Parses incoming message for DoEvolutionaryAnalysis and returns - a text with commands if there is a correct data base to be EvolutionaryAnalyzed - an error message preceded by a ? if there is an error we should reply to - a 0 if there is no valid message to reply to.'; db_path := '~darwin/DB/': if length (msg) < 10 or msg[1..5] <> 'From ' then lprint ('Invalid incoming message'); return (0) fi; first := CaseSearchString ('\n\n', msg); if first < 0 then lprint ('Start of message body not found'); return (0) fi; last := CaseSearchString ('\n\n', first + 2 + msg); if last < 0 then return ('?there is no message body.') fi; do skip := CaseSearchString ('\n\n', first + 4 + last + msg); if skip < 0 then break fi; last := last + skip + 2 od; body := msg[first+3..first+2+last]; token := sscanf (body, '%s'); token := If (length (token) = 0, '', token[1]); if length (token) < 20 or SearchString (token, 'TestNewFunction1') <> 0 and SearchString (token, 'EvolutionaryAnalysis') <> 0 then return (sprintf ('?"%s" is not a valid server command.', token)) fi; seqs := CaseSearchString (token, body) + length (token) + body; dot := SearchString('.', seqs); if dot > -1 then newdot := SearchString('.', seqs[dot+1..-1]); while newdot >-1 do dot := newdot + dot+1; newdot := SearchString('.', seqs[dot+1..-1]); od; commands := seqs[dot+1..-1]; seqs := uppercase(seqs[1..dot]); else return (sprintf('There are no commands')); fi; p := SearchString('title:', seqs); if p>-1 then p1 := SearchString('&', seqs[p+1..-1])+p+2; p2 := SearchString('&', seqs[p1+1..-1])+p1; if p2-p1>0 then MainTitle := seqs[p1..p2]; seqs:= seqs[1..p].seqs[p2+2..-1]; fi; fi; printf('Database for AllAll\n'); bad := copy([]); for seqno do printed := false; while length (seqs) > 0 and (isspace(seqs[1]) ) do seqs := 1+seqs od; i1 := SearchString(',',seqs); if i1<0 then i1 := 10^7 fi; i2 := SearchString('.',seqs); if i2<0 then i2 := 10^7 fi; i3 := SearchString(';',seqs); if i3<0 then i3 := 10^7 fi; it := min(i1,i2); it := min(it, i3); if it = 10^7 then return ('?"'.seqs[1..min (length (seqs), 6)]. '..." has no terminator ("," or "." or ";").') fi; # decide which is the format if seqs[1] = '<' then # entry in SGML format if length (seqs) < 3 or seqs[1..3] <> '' then return ('?SGML entry "'.seqs[1..it+1].'" does not start with .') fi; i3 := CaseSearchString ('', seqs); if i3 < 0 then return ('?SGML entry "'.seqs[1..it+1].'" does not end with .') fi; i4 := CaseSearchString ('', seqs); if i4 < 0 or i4 > i3 then return ('?SGML entry "'.seqs[1..it+1].'" has no tag.') fi; i5 := CaseSearchString ('', seqs); if i5 < i4 or i5 > i3 then return ('?SGML entry "'.seqs[1..it+1].'" has no tag.') fi; for i to i4 + 5 do printf ('%c', If(isspace(seqs[i]),'',seqs[i])) od; upper := lower := false; for i from i4 + 6 to i5 do if isspace(seqs[i]) then next fi; if CaseSearchString( seqs[i], 'ACDEFGHIKLMNPQRSTVWYX' ) >= 0 then printf('%c',seqs[i]); upper := true elif CaseSearchString( seqs[i], 'acdefghiklmnpqrstvwyx' ) >= 0 then printf('%c', IntToA( CaseSearchString( seqs[i],'arndcqeghilkmfpstwyvx')+1 )); lower := true else bad := append(bad, '?"'.seqs[i]. '" is an invalid character in the sequence "'. seqs[1..it].'".'); fi od; if upper and lower then bad := append(bad, '?of mixed upper and lower case to describe sequence "'. seqs[1..it].'".'); fi; for i from i5 + 1 to i3 + 4 do printf ('%c', If(isspace(seqs[i]),'',seqs[i])) od; printf ('\n'); it := i3 + 4 else under := CaseSearchString ('_', seqs); i1 := max(SearchString(';',seqs), SearchString(',',seqs)); ac := (under = -1 or under > i1) and it < 10 and it <= i1; if ac then it := i1; fi; if i1 < 0 or i1 > it then i1 := under; fi; if i1 < 0 then i1 := 10^7 fi; if (i1 < it) or ac or under > -1 then # must be an accession number or an ID arg := sscanf (seqs[1..it], '%s %d-%d'); if length (arg) <> 1 and length (arg) <> 3 then bad := append(bad, '?"'.seqs[1..it].'" is not a valid sequence identification.'); fi; if not type(SPDF,database) then OpenWriting (terminal); #machine := TimedCallSystem('machine', 10); SPDF := ReadDb('~darwin/DB/SwissProt.Z'); OpenAppending (db) fi; if ac then it := it + 1; fi; DB := SPDF; i2 := 0; if CaseSearchString ('_', Align) >0 and CaseSearchString ('__', Align)=-1 then ent := arg[1]; elif arg[1] > 'O' and arg[1] <= 'Z' and length(arg) < 10 then ent := SearchDb(arg[1]); else ent := SearchDb(arg[1]); fi; ent_len := length([ent]); if ent_len = 1 then # printf( 'Input sequence number %d', seqno ); printf('%s',ent); # printf('%s',ent['seq']); # printf( '\n' ); printed := true; elif ent_len >1 then bad := append(bad, sprintf('?"%s" was found %d times in the database. Please specify more clearly.', arg[1],ent_len )); elif ent_len <>1 then bad := append(bad, '?"'.arg[1].'" cannot be found in the database. Enter IDs or just the peptide sequences'); fi; if length (arg) = 3 then t := [GetEntryInfo (ent, 'AC', 'ID', 'DE', 'OS', 'SEQ')]; printf (''); for i by 2 to length (t) do if t[i] = 'DE' then printf ('Pos %d to %d of %s\n', arg[2], arg[3], t[i+1]) elif t[i] = 'SEQ' then if arg[2] > arg[3] or arg[2] < 1 or arg[3] > length (t[i+1]) then bad := append(bad, '?positions '.arg[2].' to '.arg[3]. ' cannot be extracted from '.arg[1]); else printf ('%s\n', t[i+1,arg[2]..arg[3]]); fi; else printf ('<%s>%s\n', t[i], t[i+1], t[i]) fi od; printf ('\n') else if bad = [] and printed = false then print( Entry(Offset(i2)) ); fi; fi else # must be an explicitly given amino acid sequence printf( 'Input sequence number %d', seqno ); upper := lower := false; for i to it do if isspace(seqs[i]) then next fi; if CaseSearchString( seqs[i], 'ACDEFGHIKLMNPQRSTVWYX' ) >= 0 then printf('%c',seqs[i]); upper := true elif CaseSearchString( seqs[i], 'acdefghiklmnpqrstvwyx' ) >= 0 then printf('%c', IntToA( CaseSearchString( seqs[i], 'arndcqeghilkmfpstwyvx')+1 )); lower := true else bad := append(bad, '?"'.seqs[i].'" is an invalid character in the sequence "'. seqs[1..it].'".'); fi od; if upper and lower then bad := append(bad, '?of mixed upper and lower case to describe sequence "'. seqs[1..it].'".'); fi; printf( '\n' ) fi fi; seqs := it+seqs; while length (seqs) > 0 and (isspace (seqs[1]) ) do seqs := 1 + seqs od; if length (seqs) <= 1 then break fi; if seqs[1] = '.' then seqs := 1 + seqs; break fi; if seqs[1] = ';' then seqs := 1 + seqs; break fi; if seqs[1] = ',' then seqs := 1 + seqs fi od; OpenWriting (terminal); if bad <> [] then return(bad); fi; err := traperror (ReadDb (db)); if err = lasterror then return ('?the database format is invalid ('.err. '); check the validity of sequences.') fi; DB := ReadDb(db); if DB[TotEntries] < 2 then return( '?there are less than two sequences, should have at least two entries.') fi; for i to DB[TotEntries] do if length(String(Sequence(Entry(i)))[1]) = 0 then return( '?sequence #'.i.' has no amino acids.') fi od; return (commands); end: AppendPostScript := proc (fromfile: string, tofile: string, title: string) global tmpfile; t := ReadRawFile (fromfile); p := CaseSearchString ('\n', t); OpenAppending (tofile); printf ('%s\n%% %s in PostScript\n%s', t[1..p], title, t[p+2..-1]); OpenAppending (tmpfile) end: DoEvolutionaryAnalysis := proc( commands: string ) global msg, db, prefix, n, printlevel, MainTitle, ct, nodes, ne, AA_Diff, CHANGES, SITES, SYNVAL: description 'process a server request for the evolutionary analysis'; dir := '/home/cbrg/WWWApache/share/public_html/Server/results/'; str := sprintf('%d',trunc(Rand()*100)); pid := getpid(); prefix2 := sprintf('%d',pid) . str . '.'; doneds := true; kaks := true; cov := true ; ne := DB[TotEntries]; tax_a := CreateArray(1..ne); tax_d := CreateArray(1..ne); label := CreateArray(1..ne); seqs := CreateArray(1..ne); dna := CreateArray(1..ne); for i to ne do tax_a[i] := SearchTag('ALI',Entry(i)); tax_d[i] := SearchTag('DALI',Entry(i)); label[i] := SearchTag('ID',Entry(i)); seqs[i] := SearchTag('SEQ',Entry(i)); dna[i] := SearchTag('DNA',Entry(i)); od; if tax_a[1] = '' then tax_a := 0 fi; if tax_d[1] = '' then tax_d := 0 fi; if seqs[1] = '' then seqs := 0 fi; if dna[1] = '' then dna := 0 fi; if tax_a = 0 and tax_d = 0 then msa := MultiAlign(seqs,label); tree := msa['Tree'] fi; a := CompleteInputData(label,seqs,dna,tax_a,tax_d); if tax_a = 0 then tax_a := a[4]; fi; if tax_d = 0 then tax_d := a[5]; fi; if seqs[1] = 0 then seqs := a[2]; fi; if dna[1] = 0 then dna := a[1]; fi; for i to DB[TotEntries] do CheckDNA(seqs[i],dna[i]): if not " then return('DNA doesnt match protein sequnce',i); fi; od: if tree = 0 then results := GetMATreeNew(copy(tax_a)): tree := copy(results[1]): Dist := copy(results[2]): Var := copy(results[3]): fi; utree := UnlabelTree3(tree): print(''); a:= traperror(GetKaKs(utree, tax_a,tax_d,label)): if a = lasterror then print(a): fi: OpenWriting(dir.prefix2.'neds'); neds:=ProcessMaliForNED(tax_d,label): ct := copy(ne): nodes := CreateArray(1..ne, 1..2): nodes[ne] := [x,x]: n := MakeNodes(utree): nednodes := MakeNodes2(n,neds[4],neds[16]): OpenAppending(dir.prefix2.'neds'); printf('\nnednodes:\n'): for i to length(nednodes) do printf('%d\t%a\t%a\t%5.3f\t%4d\n',i,nednodes[i,3],nednodes[i,4],nednodes[i,5,1],round (nednodes[i,5,1]*166.6667)) od: printf('\n'): OpenWriting(terminal); CallSystem('enscript -r -fCourier7 -p '.dir.prefix2.'neds.ps '.dir.prefix2.'neds'); OpenAppending(tmpfile); printf('Pick up your Ned Reports :\n'); printf('http://cbrg.inf.ethz.ch/Server/results/%sneds\n\n',prefix2); printf('http://cbrg.inf.ethz.ch/Server/results/%sneds.ps\n\n',prefix2); OpenWriting(terminal); end: init := proc () CreateDayMatrices(): Set(printgc = false): #not sure dartreealone or maalone are needed #or details or aa_ancestor ReadLibrary('Florida/dartreealone'): ReadLibrary('Florida/aachangereport'): ReadLibrary('Florida/structures_li'): ReadLibrary('Florida/maketrips2'): ReadLibrary('Florida/parsimony'): ReadLibrary('Florida/probabilities'): ReadLibrary('Florida/degeneracy2'): ReadLibrary('Florida/li_parameters'): ReadLibrary('Florida/ancients'): ReadLibrary('Florida/makebranch'): ReadLibrary('Florida/parsi_report'): ReadLibrary('Florida/tree_KaKs'): ReadLibrary('Florida/tree_mutations'): # ReadLibrary('Florida/details'): ReadLibrary('Florida/aa_ancestor'): ReadLibrary('Florida/maashow'): ReadLibrary('Florida/madshow'): ReadLibrary('Florida/ttn_tvn.work2'): ReadLibrary('Florida/codtocod'): ReadLibrary('Florida/glob_mod'): ReadLibrary('Florida/NED'): ReadLibrary('DNATools'); ReadLibrary('Florida/KaKs'): Li_Parsi_Globals(): end: job := proc () global msg, db, prefix, MainTitle; option trace; prefix := '/tmp/'.getpid ().'.'; dir := '/home/cbrg/WWWApache/share/public_html/Server/results/'; db := prefix.'db'; CallSystem ('rm '.db.'*'); OpenWriting (db); cmds := ParseMsg (msg): OpenWriting (terminal); res := 'error'; if type (cmds, string) or type(cmds, list) then OpenWriting (tmpfile); printf ('\nTitle of your job: %s\n\n', MainTitle); if type(cmds, string) then if (cmds[1] = '?') then lprint ('Sorry, your request could not be processed,'); printf ('because %s', cmds[2..-1]); lprint () else a := traperror(DoEvolutionaryAnalysis (cmds)): if a = lasterror then lprint (); lprint (); lprint ('Sorry, your request could not be processed'); lprint ('because an error was encountered.'); lprint ('Please send this error to darwin.comments@inf.ethz.ch.'); lprint ('Thank you, the cbrg team.'); lprint (); lprint (); printf ('%s', a); lprint (); fi: fi: else lprint ('Sorry, your request could not be processed,'); printf ('because the following errors occurred:\n'); for i to length(cmds) do lprint (cmds[i]); od: fi: OpenWriting (terminal); # OpenAppending ('~cbrg/EvolutionaryAnalysis/job.trace'); res := ReadRawFile (tmpfile); # OpenAppending ('~cbrg/EvolutionaryAnalysis/job.trace'); # lprint('After Running DoEvolutionaryAnalysis EvolutionaryAnalysis:', res); OpenWriting (terminal); fi; CallSystem ('rm '.prefix.'*'); res end: isspace := c -> SearchString(c, ' !\n') >= 0 : #isspace := c -> c <= ' ': isaa := c->SearchString( c, 'ACDEFGHIKLMNPQRSTVWYX' ) >= 0: isalpha := c->SearchString( c, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ.,;:<>\\/' ) >= 0: DbToArrays := proc(db) err := traperror (ReadDb (db)); ne := db['TotEntries']; seqs := CreateArray(ne): dna := CreateArray(ne): ali := CreateArray(ne): dali := CreateArray(ne): refs := CreateArray(ne): for i to ne do seqs[i] := SearchTag('SEQ',Entry(i)); dna[i] := SearchTag('DNA',Entry(i)); ali[i] := SearchTag('ALI',Entry(i)); dali[i] := SearchTag('DALI',Entry(i)); refs[i] := SearchTag('ID',Entry(i)); if refs[i] = '' then refs[i] := SearchTag('AC',Entry(i)); fi; od; [seqs,dna,ali,dali] end: CompleteInputData:= proc(label:{array(string),0}, seqs:{array(string),0}, dna:{array(string),0}, ali:{array(string),0},dali:{array(string),0}) alignment := isseqs := isdna := isdali := isali := false; label1 := copy(label); seqs1 := copy(seqs); dna1 := copy(dna); ali1 := copy(ali); dali1 := copy(dali); if not( seqs = 0 or length(seqs[1]) = 0) then isseqs := true; fi; if not( dna = 0 or length(dna[1]) = 0) then isdna := true; fi; if not( ali = 0 or length(ali[1]) = 0) then isali := true; fi; if not( dali = 0 or length(dali[1]) = 0) then isdali := true; fi; if not isseqs and not isali then error ('either protein sequences or protein alignment must be defined'); fi; if not isdna and not isdali then error ('either dna sequences or dna alignment must be defined'); fi; if isali or isdali then alignment := true; fi; if alignment then if not isali then ali1 := CreateProteinAlignFromDNA (seqs,dali1) fi; if not isdali then dali1 := CreateDNAAlignFromProtein (dna,ali1) fi; if not isseqs then seqs1 := CreateUngappedSequences(ali1); fi; if not isdna then dna1 := CreateUngappedSequences(dali1); fi; result := VerifyDNAMatchesProtein(seqs,dna); else msa := MultiAlign(seqs,label); msa1 := DoMultiAlign(msa,'PROB GAP'); ali1 := msa1['MA']; dali1 := CreateDNAAlignFromProtein (dna,ali1); fi; [label1,seqs1,dna1,ali1,dali1]; end: