In [1]:
import pandas as pd
import numpy as np
from plotnine import *
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
In [2]:
experiment_pd = pd.read_csv("alpha_beta_model_values.tsv",sep="\t")
experiment_pd.columns
Out[2]:
Index(['entity:experiment_id', 'target', 'tissue_name', 'number_of_peaks',
       'auprc-run_id_3', 'auprc_wo_bias-run_id_3', 'auroc-run_id_3',
       'auroc_wo_bias-run_id_3', 'jsd-run_id_3', 'jsd_all_peaks-run_id_3',
       'jsd_all_peaks_wo_bias-run_id_3', 'jsd_wo_bias-run_id_3',
       'number_of_peaks_test_chroms_split0', 'pearson-run_id_3',
       'pearson_all_peaks-run_id_3', 'pearson_all_peaks_wo_bias-run_id_3',
       'pearson_with_control', 'pearson_with_control_all_peaks',
       'pearson_wo_bias-run_id_3', 'spearman-run_id_3',
       'spearman_all_peaks-run_id_3', 'spearman_all_peaks_wo_bias-run_id_3',
       'spearman_with_control', 'spearman_with_control_all_peaks',
       'spearman_wo_bias-run_id_3', 'protein_tag', 'sample_summary',
       'model_alpha-run_id_3', 'model_beta-run_id_3',
       'model_bias_term-run_id_3', 'primary_log2_fold_change-run_id_3',
       'primary_log2_fold_change_rc-run_id_3'],
      dtype='object')
In [3]:
(ggplot(experiment_pd,aes(x='pearson_with_control',y='model_alpha-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("pearson_with_control")
 +ylab("alpha")
 +labs(title="Comparison of pearson_with_control with alpha")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
Out[3]:
<ggplot: (-9223363243324266864)>
In [4]:
(ggplot(experiment_pd,aes(x='pearson_with_control',y='model_beta-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("pearson_with_control")
 +ylab("beta")
 +labs(title="Comparison of pearson_with_control with beta")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
Out[4]:
<ggplot: (-9223363243325001938)>
In [5]:
(ggplot(experiment_pd,aes(x='pearson_with_control_all_peaks',y='model_alpha-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("pearson_with_control_all_peaks")
 +ylab("alpha")
 +labs(title="Comparison of pearson_with_control_all_peaks with alpha")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
Out[5]:
<ggplot: (8793529787894)>
In [6]:
(ggplot(experiment_pd,aes(x='pearson_with_control_all_peaks',y='model_beta-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("pearson_with_control_all_peaks")
 +ylab("beta")
 +labs(title="Comparison of pearson_with_control_all_peaks with beta")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
Out[6]:
<ggplot: (8793526525767)>
In [7]:
(ggplot(experiment_pd[experiment_pd['number_of_peaks']>15000],aes(x='pearson_with_control_all_peaks',y='model_beta-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("pearson_with_control_all_peaks")
 +ylab("beta")
 +labs(title="Comparison of pearson_with_control_all_peaks with beta")
 +theme_classic())
Out[7]:
<ggplot: (8793526446056)>
In [8]:
experiment_pd= experiment_pd[~experiment_pd['model_beta-run_id_3'].isnull()].reset_index(drop=True)
In [9]:
(ggplot(experiment_pd,aes(x='model_beta-run_id_3'))
 +geom_histogram()
 +xlab("beta")
 +ylab("frequency")
 +labs(title="histogram of beta")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/stats/stat_bin.py:95: PlotnineWarning: 'stat_bin()' using 'bins = 31'. Pick better value with 'binwidth'.
Out[9]:
<ggplot: (8793526364045)>
In [10]:
(ggplot(experiment_pd,aes(x='model_alpha-run_id_3'))
 +geom_histogram()
 +xlab("alpha")
 +ylab("frequency")
 +labs(title="histogram of alpha")
 +theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/stats/stat_bin.py:95: PlotnineWarning: 'stat_bin()' using 'bins = 78'. Pick better value with 'binwidth'.
Out[10]:
<ggplot: (-9223363243328431374)>
In [11]:
(ggplot(experiment_pd,aes(x='model_alpha-run_id_3',y='model_beta-run_id_3'))
 +geom_point(aes(fill='number_of_peaks'),alpha=0.2)
 +xlab("alpha")
 +ylab("beta")
 +labs(title="Comparison of alpha with beta")
 +theme_classic())
Out[11]:
<ggplot: (8793526312126)>
In [ ]: