import pandas as pd
import numpy as np
from plotnine import *
import matplotlib
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
experiment_pd = pd.read_csv("alpha_beta_model_values.tsv",sep="\t")
experiment_pd.columns
Index(['entity:experiment_id', 'target', 'tissue_name', 'number_of_peaks', 'auprc-run_id_3', 'auprc_wo_bias-run_id_3', 'auroc-run_id_3', 'auroc_wo_bias-run_id_3', 'jsd-run_id_3', 'jsd_all_peaks-run_id_3', 'jsd_all_peaks_wo_bias-run_id_3', 'jsd_wo_bias-run_id_3', 'number_of_peaks_test_chroms_split0', 'pearson-run_id_3', 'pearson_all_peaks-run_id_3', 'pearson_all_peaks_wo_bias-run_id_3', 'pearson_with_control', 'pearson_with_control_all_peaks', 'pearson_wo_bias-run_id_3', 'spearman-run_id_3', 'spearman_all_peaks-run_id_3', 'spearman_all_peaks_wo_bias-run_id_3', 'spearman_with_control', 'spearman_with_control_all_peaks', 'spearman_wo_bias-run_id_3', 'protein_tag', 'sample_summary', 'model_alpha-run_id_3', 'model_beta-run_id_3', 'model_bias_term-run_id_3', 'primary_log2_fold_change-run_id_3', 'primary_log2_fold_change_rc-run_id_3'], dtype='object')
(ggplot(experiment_pd,aes(x='pearson_with_control',y='model_alpha-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("pearson_with_control")
+ylab("alpha")
+labs(title="Comparison of pearson_with_control with alpha")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
<ggplot: (-9223363243324266864)>
(ggplot(experiment_pd,aes(x='pearson_with_control',y='model_beta-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("pearson_with_control")
+ylab("beta")
+labs(title="Comparison of pearson_with_control with beta")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
<ggplot: (-9223363243325001938)>
(ggplot(experiment_pd,aes(x='pearson_with_control_all_peaks',y='model_alpha-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("pearson_with_control_all_peaks")
+ylab("alpha")
+labs(title="Comparison of pearson_with_control_all_peaks with alpha")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
<ggplot: (8793529787894)>
(ggplot(experiment_pd,aes(x='pearson_with_control_all_peaks',y='model_beta-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("pearson_with_control_all_peaks")
+ylab("beta")
+labs(title="Comparison of pearson_with_control_all_peaks with beta")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/layer.py:401: PlotnineWarning: geom_point : Removed 43 rows containing missing values.
<ggplot: (8793526525767)>
(ggplot(experiment_pd[experiment_pd['number_of_peaks']>15000],aes(x='pearson_with_control_all_peaks',y='model_beta-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("pearson_with_control_all_peaks")
+ylab("beta")
+labs(title="Comparison of pearson_with_control_all_peaks with beta")
+theme_classic())
<ggplot: (8793526446056)>
experiment_pd= experiment_pd[~experiment_pd['model_beta-run_id_3'].isnull()].reset_index(drop=True)
(ggplot(experiment_pd,aes(x='model_beta-run_id_3'))
+geom_histogram()
+xlab("beta")
+ylab("frequency")
+labs(title="histogram of beta")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/stats/stat_bin.py:95: PlotnineWarning: 'stat_bin()' using 'bins = 31'. Pick better value with 'binwidth'.
<ggplot: (8793526364045)>
(ggplot(experiment_pd,aes(x='model_alpha-run_id_3'))
+geom_histogram()
+xlab("alpha")
+ylab("frequency")
+labs(title="histogram of alpha")
+theme_classic())
/users/vir/anaconda3/envs/basepairmodels_latest/lib/python3.7/site-packages/plotnine/stats/stat_bin.py:95: PlotnineWarning: 'stat_bin()' using 'bins = 78'. Pick better value with 'binwidth'.
<ggplot: (-9223363243328431374)>
(ggplot(experiment_pd,aes(x='model_alpha-run_id_3',y='model_beta-run_id_3'))
+geom_point(aes(fill='number_of_peaks'),alpha=0.2)
+xlab("alpha")
+ylab("beta")
+labs(title="Comparison of alpha with beta")
+theme_classic())
<ggplot: (8793526312126)>