//--------------------------------------------------------------------------------
//
// Copyright © The University of Queensland, 2012-2014. All rights reserved.
//
// License:
//--------------------------------------------------------------------------------
namespace Data
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.IO;
using System.Text.RegularExpressions;
using Shared;
using Genomics;
///
/// Calculations that roundtrip from C# -> R -> C#.
///
public static class Calculations
{
///
/// Convert z scores to log pvalues using R
///
/// The to log pvalue.
/// The z coordinate.
public static double[] ZscoreToLogPvalue(IEnumerable z)
{
string dataFile = Tables.ToNsvFile(z);
//string scriptName = "./zscore.R";
string scriptName = Path.GetTempFileName() + ".R";
string outputFile = scriptName + "out.tsv";
R.WriteAndRunScript(new string[]
{
R.ReadColumn(R.Variables.X, dataFile),
string.Format("{0} <- pnorm(-abs({1}), log=T) + log(2)", R.Variables.D, R.Variables.X),
R.WriteCsv(R.Variables.D, outputFile),
},
scriptName);
return R.FromCsvFile(outputFile);
}
///
/// Convert z scores to bonferroni corrected pvalues using R
///
/// The to log pvalue.
/// The z coordinate.
public static double[] ZscoreToCorrectedPvalue(double[] z, int n)
{
//string dataFile = "./zdatac.tsv";
string dataFile = Path.GetTempFileName() + ".tsv";
using (TextWriter tw = Helpers.CreateStreamWriter(dataFile))
{
tw.WriteLine(string.Join("\n", z.Select(x => -Math.Abs(x))));
}
//string scriptName = "./zscorec.R";
string scriptName = Path.GetTempFileName() + ".R";
using (TextWriter tw = Helpers.CreateStreamWriter(scriptName))
{
tw.WriteLine("x <- read.table('" + dataFile.Replace(@"\", @"\\") + "')");
tw.WriteLine("d <- p.adjust(exp(pnorm(x[,1], log=T) + log(2)), method=\"bonferroni\")");
tw.WriteLine("for (i in 1:dim(x)[1]) { print(d[i]) }");
//tw.WriteLine(string.Join("\n", z.Select(x => string.Format("bcp = p.adjust(exp(pnorm(" + -Math.Abs(x) + ", log=T) + log(2)), method=\"bonferroni\", n=" + n + "); print(sprintf(\"bcp=%e=\", bcp));"))));
}
string outputFile = Path.GetTempFileName();
//string outputFile = scriptName + "out";
R.RunScript(scriptName, outputFile);
Regex lp = new Regex("\\[1\\] ");
using (TextReader tr = new StreamReader(outputFile))
{
return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line))
.Select(line =>
{
return double.Parse(line.Split(' ')[1].Trim());
}).ToArray();
}
}
///
/// Performs a sign test on the first column of a matrix relative to the other columns
///
/// The ranked matrix.
/// Matrix file name.
public static double[] TestRankedMatrix(string matrixFileName)
{
string scriptName = matrixFileName + ".R";
using (TextWriter tw = Helpers.CreateStreamWriter(scriptName))
{
Regression.WriteSource(tw);
tw.WriteLine("test.ranked.matrix('" + matrixFileName + "')");
}
string outputFile = scriptName + "out";
R.RunScript(scriptName, outputFile);
Regex lp = new Regex("\\[1\\] ");
using (TextReader tr = new StreamReader(outputFile))
{
return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line))
.Select(line =>
{
//Console.WriteLine(line);
return double.Parse(line.Split(' ')[1]);
}).ToArray();
}
}
public static double TestSetOverlap(int overlap, int sample1, int sample2, int total)
{
int[] contingency = { overlap, sample1 - overlap, sample2 - overlap, total - sample1 - sample2 + overlap };
string scriptName = Path.GetTempFileName() + ".R";
using (TextWriter tw = Helpers.CreateStreamWriter(scriptName))
{
tw.WriteLine("print(fisher.test(matrix(c(" + string.Join(",", contingency) + "), 2, 2))$p.value)");
}
string outputFile = scriptName + "out";
R.RunScript(scriptName, outputFile);
Regex lp = new Regex("\\[1\\] ");
using (TextReader tr = new StreamReader(outputFile))
{
return tr.ReadToEnd().Split('\n').Where(line => lp.IsMatch(line))
.Select(line =>
{
//Console.WriteLine(line);
return double.Parse(line.Split(' ')[1]);
}).First();
}
}
public enum TestSide
{
TwoSided,
Less,
Greater,
}
public class TestResult
{
public double TestStatistic { get; set; }
public double Pvalue { get; set; }
}
public static Dictionary Alternatives = new Dictionary
{
{ TestSide.TwoSided, "'two.sided'" },
{ TestSide.Less, "'less'" },
{ TestSide.Greater, "'greater'" },
};
///
/// Kolmogorov-Smirnov p-value
///
/// The test.
/// Table file.
/// Data column.
/// Factor column.
/// Factor1.
/// Factor2.
public static TestResult KSTest(
TestSide side,
string tableFile,
string dataColumn,
string factorColumn,
string factor1,
string factor2)
{
string tempFile = Path.GetTempFileName();
string pvalueFile = tempFile + ".tsv";
R.WriteAndRunScript(new string[]
{
R.ReadTable(R.Variables.X, tableFile, true),
"y <- x[which(x$Confidence == 'Linked'),]",
R.FactorColumn("y", factorColumn),
string.Format("x1 <- y${0}[which(y${1} == '{2}')]", dataColumn, factorColumn, factor1),
string.Format("x2 <- y${0}[which(y${1} == '{2}')]", dataColumn, factorColumn, factor2),
string.Format("result <- ks.test(x1,x2,alternative={0})", Alternatives[side]),
"m <- matrix(c(result$statistic, result$p.value), 1, 2)",
R.WriteTable("m", pvalueFile)
},
tempFile + ".R");
var dataLine = R.ReadDataLine(pvalueFile);
return new TestResult
{
TestStatistic = dataLine[0],
Pvalue = dataLine[1]
};
}
}
}