// Copyright 2014 Hamed S. Najafabadi /******************************************************************** This file is part of FASTAtoRF. FASTAtoRF is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. FASTAtoRF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with FASTAtoRF. If not, see . ********************************************************************/ #include #include #include #include #include #include "declarations.h" #define VALIDATE_POS(x,l) ((x)residues = new char[ 30 ]; // allocate memory // copy the info zf ->seq_length = 29; zf ->zf_start = zf_start; zf ->helix_start = helix_start; zf ->last_pos = last_pos; // copy the first three positions memcpy( zf ->residues, gene_seq + zf_start, sizeof(char) * 3 ); // set the next 5 positions to - (gap) memset( zf ->residues + 3, '-', sizeof(char) * 5 ); // copy the helix region memcpy( zf ->residues + 8, gene_seq + helix_start, sizeof(char) * 14 ); // set the next 6 positions to - (gap) memset( zf ->residues + 22, '-', sizeof(char) * 6 ); // copy the last position zf ->residues[ 28 ] = gene_seq[ last_pos ]; zf ->residues[ 29 ] = 0; // terminate the string } ///////////////////////////////////////////////////////////////////////////////// void find_zfs( s_gene *genes[], int num_genes ) { //cout << "GENE" << char(9) << "ZF_INDEX" // << char(9) << "ZF_START" // << char(9) << "ZF_END" // << char(9) << "ZF_SEQ" << endl; // examine all genes int i; for( i = 0; i < num_genes; i ++ ) { genes[ i ] ->num_zfs = 0; int j; for( j = 0; j < genes[ i ] ->seq_length; j ++ ) { int zf_start = j; int helix_start; int last_pos; if( _is_c2h2( genes[ i ] ->seq, zf_start, genes[ i ] ->seq_length, &helix_start, &last_pos ) ) // this is a C2H2 { if( genes[ i ] ->num_zfs && // there are previously found ZFs zf_start <= genes[ i ] ->zfs[ genes[ i ] ->num_zfs - 1 ] ->last_pos ) // this zf overlaps with the previous one { cout << "WARNING: Overlapping ZFs are found in " << genes[ i ] ->name << ". Only the first ZF instance is considered." << endl; } else if( genes[ i ] ->num_zfs < MAX_ZF_PER_GENE ) // there is still room for more ZFs // add the found c2h2 to the list { int index = genes[ i ] ->num_zfs; genes[ i ] ->zfs[ index ] = new s_c2h2; // create the new ZF _copy_zf_sequence( genes[ i ] ->zfs[ index ], genes[ i ] ->seq, zf_start, helix_start, last_pos ); // copy the sequence, according to the canonical format genes[ i ] ->num_zfs ++; // update the number of ZFs //cout << genes[ i ] ->name << char(9) << index+1 // << char(9) << genes[ i ] ->zfs[ index ] ->zf_start+1 // << char(9) << genes[ i ] ->zfs[ index ] ->last_pos+1 // << char(9) << genes[ i ] ->zfs[ index ] ->residues << endl; } else // too many ZFs { cout << "WARNING: Too many ZFs for " << genes[ i ] ->name << ". " << "Some ZFs are ignored." << endl; break; // ignore the rest of the sequence } } } } } ///////////////////////////////////////////////////////////////////////////////// void randomize_zfs( s_gene *genes[], int num_genes ) { s_c2h2 *zfs[ MAX_ZFS ]; int num_zfs = 0; // create a list of all zfs int i, j; for( i = 0; i < num_genes; i ++ ) for( j = 0; j < genes[ i ] ->num_zfs; j ++ ) { zfs[ num_zfs ] = genes[ i ] ->zfs[ j ]; num_zfs ++; } // shuffle each position of the ZFs independently for( i = 0; i < _L; i ++ ) for( j = 0; j < num_zfs - 1; j ++ ) { int rnd_index = ( rand() % (num_zfs-j) ) + j; char swap = zfs[ j ] ->residues[ i ]; zfs[ j ] ->residues[ i ] = zfs[ rnd_index ] ->residues[ i ]; zfs[ rnd_index ] ->residues[ i ] = swap; } }