//-------------------------------------------------------------------------------- // // Copyright © The University of Queensland, 2012-2014. All rights reserved. // // License: //-------------------------------------------------------------------------------- namespace Data { using System; using System.Collections.Generic; using System.Linq; using System.IO; using System.Text.RegularExpressions; using Shared; using Genomics; /// /// Calculations that roundtrip from C# -> R -> C#. /// public static class Calculations { /// /// Convert z scores to log pvalues using R /// /// The to log pvalue. /// The z coordinate. public static double[] ZscoreToLogPvalue(IEnumerable z) { string dataFile = Tables.ToNsvFile(z); //string scriptName = "./zscore.R"; string scriptName = Path.GetTempFileName() + ".R"; string outputFile = scriptName + "out.tsv"; R.WriteAndRunScript(new string[] { R.ReadColumn(R.Variables.X, dataFile), string.Format("{0} <- pnorm(-abs({1}), log=T) + log(2)", R.Variables.D, R.Variables.X), R.WriteCsv(R.Variables.D, outputFile), }, scriptName); return R.FromCsvFile(outputFile); } /// /// Convert z scores to bonferroni corrected pvalues using R /// /// The to log pvalue. /// The z coordinate. public static double[] ZscoreToCorrectedPvalue(double[] z, int n) { //string dataFile = "./zdatac.tsv"; string dataFile = Path.GetTempFileName() + ".tsv"; using (TextWriter tw = Helpers.CreateStreamWriter(dataFile)) { tw.WriteLine(string.Join("\n", z.Select(x => -Math.Abs(x)))); } //string scriptName = "./zscorec.R"; string scriptName = Path.GetTempFileName() + ".R"; using (TextWriter tw = Helpers.CreateStreamWriter(scriptName)) { tw.WriteLine("x <- read.table('" + dataFile.Replace(@"\", @"\\") + "')"); tw.WriteLine("d <- p.adjust(exp(pnorm(x[,1], log=T) + log(2)), method=\"bonferroni\")"); tw.WriteLine("for (i in 1:dim(x)[1]) { print(d[i]) }"); //tw.WriteLine(string.Join("\n", z.Select(x => string.Format("bcp = p.adjust(exp(pnorm(" + -Math.Abs(x) + ", log=T) + log(2)), method=\"bonferroni\", n=" + n + "); print(sprintf(\"bcp=%e=\", bcp));")))); } string outputFile = Path.GetTempFileName(); //string outputFile = scriptName + "out"; R.RunScript(scriptName, outputFile); Regex lp = new Regex("\\[1\\] "); using (TextReader tr = new StreamReader(outputFile)) { return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line)) .Select(line => { return double.Parse(line.Split(' ')[1].Trim()); }).ToArray(); } } /// /// Performs a sign test on the first column of a matrix relative to the other columns /// /// The ranked matrix. /// Matrix file name. public static double[] TestRankedMatrix(string matrixFileName) { string scriptName = matrixFileName + ".R"; using (TextWriter tw = Helpers.CreateStreamWriter(scriptName)) { Regression.WriteSource(tw); tw.WriteLine("test.ranked.matrix('" + matrixFileName + "')"); } string outputFile = scriptName + "out"; R.RunScript(scriptName, outputFile); Regex lp = new Regex("\\[1\\] "); using (TextReader tr = new StreamReader(outputFile)) { return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line)) .Select(line => { //Console.WriteLine(line); return double.Parse(line.Split(' ')[1]); }).ToArray(); } } public static double TestSetOverlap(int overlap, int sample1, int sample2, int total) { int[] contingency = { overlap, sample1 - overlap, sample2 - overlap, total - sample1 - sample2 + overlap }; string scriptName = Path.GetTempFileName() + ".R"; using (TextWriter tw = Helpers.CreateStreamWriter(scriptName)) { tw.WriteLine("print(fisher.test(matrix(c(" + string.Join(",", contingency) + "), 2, 2))$p.value)"); } string outputFile = scriptName + "out"; R.RunScript(scriptName, outputFile); Regex lp = new Regex("\\[1\\] "); using (TextReader tr = new StreamReader(outputFile)) { return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line)) .Select(line => { //Console.WriteLine(line); return double.Parse(line.Split(' ')[1]); }).First(); } } public enum TestSide { TwoSided, Less, Greater, } public class TestResult { public double TestStatistic { get; set; } public double Pvalue { get; set; } } public static Dictionary Alternatives = new Dictionary { { TestSide.TwoSided, "'two.sided'" }, { TestSide.Less, "'less'" }, { TestSide.Greater, "'greater'" }, }; /// /// Kolmogorov-Smirnov p-value /// /// The test. /// Table file. /// Data column. /// Factor column. /// Factor1. /// Factor2. public static TestResult KSTest( TestSide side, string tableFile, string dataColumn, string factorColumn, string factor1, string factor2) { string tempFile = Path.GetTempFileName(); string pvalueFile = tempFile + ".tsv"; R.WriteAndRunScript(new string[] { R.ReadTable(R.Variables.X, tableFile, true), "y <- x[which(x$Confidence == 'Linked'),]", R.FactorColumn("y", factorColumn), string.Format("x1 <- y${0}[which(y${1} == '{2}')]", dataColumn, factorColumn, factor1), string.Format("x2 <- y${0}[which(y${1} == '{2}')]", dataColumn, factorColumn, factor2), string.Format("result <- ks.test(x1,x2,alternative={0})", Alternatives[side]), "m <- matrix(c(result$statistic, result$p.value), 1, 2)", R.WriteTable("m", pvalueFile) }, tempFile + ".R"); var dataLine = R.ReadDataLine(pvalueFile); return new TestResult { TestStatistic = dataLine[0], Pvalue = dataLine[1] }; } } }