//-------------------------------------------------------------------------------- // File: CuffDiffFile.cs // Author: Timothy O'Connor // © Copyright University of Queensland, 2012-2014. All rights reserved. // License: //-------------------------------------------------------------------------------- namespace Genomics { using System; using System.Collections.Generic; using System.IO; using System.Linq; using Shared; using System.Text.RegularExpressions; /// /// Class for encapsulating data coming from a cuffdiff run /// public class CuffDiffData { public string TestId { get; set; } public string GeneId { get; set; } public string Gene { get; set; } public Genomics.Location Locus { get; set; } public string Status { get; set; } public double Fpkm1 { get; set; } public double Fpkm2 { get; set; } public double FoldChange { get; set; } public double PValue { get; set; } public double QValue { get; set; } public bool Significant { get; set; } } /// /// Object representation of a cuff diff output file /// public class CuffDiffFile { /// /// The filename. /// private readonly string filename; /// /// The transcript data. /// private Dictionary transcriptData; /// /// Significantly changed genes /// private List significantGenes; /// /// Initializes a new instance of the class. /// public CuffDiffFile(string filename) { this.filename = filename; } /// /// Significantly changed genes /// public List SignificantlyChangedGenes { get { return Helpers.CheckInit( ref this.significantGenes, () => this.TranscriptData.Values.Where(x => x.Significant).Select(x => x.Gene).ToList()); } } /// /// Gets the transcript data from the differential expression experiment /// /// The transcript data. public Dictionary TranscriptData { get { return Helpers.CheckInit( ref this.transcriptData, () => { using (TextReader tr = new StreamReader(this.filename)) { var headers = tr.ReadLine().Trim().Split('\t') .Select((x, i) => new { Field = x, Index = i }) .ToDictionary(x => x.Field, x => x.Index); return tr.ReadToEnd().Split('\n').Where(line => !string.IsNullOrWhiteSpace(line)) .Select(line => { var fields = line.Split('\t'); if (fields.Length != headers.Count) { throw new Exception(string.Format("Invalid number of fields ({0}) in line:\n\t{1}", fields.Length, line)); } var locusFields = fields[headers["locus"]].Split(new char[] { ':', '-' } ); Func parseDouble = (string arg) => { if (arg == "inf") { return double.PositiveInfinity; } else if (arg == "-inf") { return double.NegativeInfinity; } else { return double.Parse(arg); } }; Regex ensembleFormat = new Regex("^ENS[TG]"); Func RemoveEnsemblSuffix = (id) => { if (ensembleFormat.IsMatch(id)) { return id.Split('.')[0]; } return id; }; return new CuffDiffData { TestId = RemoveEnsemblSuffix(fields[headers["test_id"]]), GeneId = RemoveEnsemblSuffix(fields[headers["gene_id"]]), Gene = fields[headers["gene"]], Locus = new Genomics.Location { Name = RemoveEnsemblSuffix(fields[headers["gene_id"]]), Chromosome = locusFields[0], Start = int.Parse(locusFields[1]), End = int.Parse(locusFields[2]), }, Status = fields[headers["status"]], Fpkm1 = parseDouble(fields[headers["value_1"]]), Fpkm2 = parseDouble(fields[headers["value_2"]]), FoldChange = parseDouble(fields[headers["log2(fold_change)"]]), PValue = parseDouble(fields[headers["p_value"]]), QValue = parseDouble(fields[headers["q_value"]]), Significant = fields[headers["significant"]] == "yes" }; }) .ToLookup(x => x.GeneId, x => x) .ToDictionary(x => x.Key, x => x.OrderBy(y => Math.Abs(y.FoldChange)).Last()); } }); } } } }