Goal

  • analyze the pairwise features for Nanog

Conclusions

  • there is almost nothing going on in the pairwise interactions
In [1]:
from basepair.imports import *
ddir = get_data_dir()
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
In [2]:
from basepair.plot.profiles import extract_signal
from basepair.math import softmax
from basepair.plot.heatmaps import heatmap_stranded_profile, multiple_heatmap_stranded_profile
from basepair.plot.profiles import  plot_stranded_profile, multiple_plot_stranded_profile

import plotnine
from plotnine import *

import statsmodels.api as sm
import statsmodels.formula.api as smf
In [338]:
from basepair.modisco.results import Seqlet, resize_seqlets
from basepair.modisco.core import dfi2seqlets, annotate_profile
from basepair.cli.modisco import load_profiles
In [337]:
model_dir = Path(f"{ddir}/processed/chipnexus/exp/models/oct-sox-nanog-klf/models/n_dil_layers=9/")
modisco_pdir = model_dir / "modisco/by_peak_tasks/weighted/"
In [34]:
# Load the data
d = HDF5Reader(model_dir / "grad.all.h5")
d.open()
In [6]:
dfi = pd.read_csv(f"{modisco_pdir}/Nanog/instances.tsv.gz", sep='\t')
In [7]:
dfi.tail()
Out[7]:
pattern example_idx pattern_start pattern_end strand pattern_len pattern_center match_weighted match_weighted_p match_weighted_cat ... match/Sox2 imp/Klf4 imp/Nanog imp/Oct4 imp/Sox2 example_chrom example_start example_end example_strand example_interval_from_task
295017 metacluster_0/pattern_0 18016 552 568 - 16 560 0.142941 0.193969 low ... 0.160996 0.101324 0.330139 0.052185 0.092031 chr3 10381329 10382329 * Nanog
295018 metacluster_0/pattern_0 18016 591 607 + 16 599 0.147612 0.193969 low ... 0.128601 0.078536 0.311810 0.050221 0.088571 chr3 10381329 10382329 * Nanog
295019 metacluster_0/pattern_0 18016 689 705 - 16 697 0.148917 0.193969 low ... 0.199165 0.076908 0.100458 0.134652 0.110449 chr3 10381329 10382329 * Nanog
295020 metacluster_0/pattern_0 18016 695 711 + 16 703 0.205385 0.215127 low ... 0.215987 0.079238 0.072323 0.128498 0.098791 chr3 10381329 10382329 * Nanog
295021 metacluster_0/pattern_0 18016 876 892 - 16 884 0.197814 0.215127 low ... 0.246796 0.025204 0.014320 0.031117 0.021162 chr3 10381329 10382329 * Nanog

5 rows × 33 columns

In [9]:
seqlets_medium = dfi2seqlets(dfi[dfi.match_weighted_cat == 'medium'])
In [10]:
dfi.match_weighted.plot.hist(100);
In [11]:
dfi.match_weighted_p.plot.hist(10);
In [12]:
dfi.match_weighted_cat.value_counts().plot.bar();
In [13]:
dfi.imp_weighted_p.plot.hist(100);
In [14]:
dfi.imp_weighted_cat.value_counts().plot.bar();
In [15]:
dfi.match_weighted_p.plot.hist(10);
In [16]:
from basepair.stats import low_medium_high

Re-compute the motif instances

In [57]:
p = mr.get_pattern(pattern).trim_seq_ic(0.08)
In [76]:
dfi.head()
Out[76]:
pattern example_idx pattern_start pattern_end strand pattern_len pattern_center match_weighted match_weighted_p match_weighted_cat ... match/Sox2 imp/Klf4 imp/Nanog imp/Oct4 imp/Sox2 example_chrom example_start example_end example_strand example_interval_from_task
0 metacluster_0/pattern_0 0 75 91 + 16 83 0.175556 0.193969 low ... 0.206333 0.050587 0.063537 0.090030 0.036130 chrX 143482572 143483572 * Nanog
1 metacluster_0/pattern_0 0 152 168 - 16 160 0.141040 0.193969 low ... 0.223384 0.032036 0.020173 0.023113 0.029687 chrX 143482572 143483572 * Nanog
2 metacluster_0/pattern_0 0 235 251 - 16 243 0.170543 0.193969 low ... 0.206882 0.018007 0.063500 0.021244 0.022853 chrX 143482572 143483572 * Nanog
3 metacluster_0/pattern_0 0 237 253 + 16 245 0.151329 0.193969 low ... 0.186484 0.017459 0.059497 0.021635 0.023506 chrX 143482572 143483572 * Nanog
4 metacluster_0/pattern_0 0 245 261 - 16 253 0.205689 0.215127 low ... 0.221023 0.020000 0.051363 0.035843 0.030362 chrX 143482572 143483572 * Nanog

5 rows × 33 columns

Nanog

In [86]:
task = "Nanog"
pattern = "metacluster_0/pattern_0"

Dev - compute the profile similarity

In [324]:
# load profiles
profiles = load_profiles(modisco_pdir, model_dir/'grad.all.h5')
In [ ]:
tasks = list(profiles)
In [345]:
dfi_anno = annotate_profile(dfi, mr, profiles)
  0%|          | 0/1 [00:00<?, ?it/s]
100%|██████████| 1/1 [05:48<00:00, 348.17s/it]
In [351]:
dfi_anno['Klf4/profile_match'].isnull().mean()
Out[351]:
0.026150592159228804
In [363]:
dfi_anno['match_weighted_cat'] = pd.Categorical(dfi_anno['match_weighted_cat'])
In [364]:
dfi_anno['imp_weighted_cat'] = pd.Categorical(dfi_anno['imp_weighted_cat'])
In [369]:
len(dfi_anno)
Out[369]:
295022
In [370]:
len(dfi)
Out[370]:
295022
In [392]:
dfif = dfi_anno[~np.isinf(dfi_anno['Oct4/profile_match'])]
In [393]:
dfif.groupby(['match_weighted_cat', 'imp_weighted_cat'])['Oct4/profile_match'].mean().plot.bar()
Out[393]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0b0bf3fac8>
In [371]:
ggplot(aes(x='match_weighted_cat', 
           color='imp_weighted_cat', 
           y='Oct4/profile_match'), dfi_anno.sample(100000)) + geom_boxplot()
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/utils.py in match(v1, v2, nomatch, incomparables, start)
    139         with suppress(KeyError):
--> 140             lst[i] = lookup[x] + start
    141 

KeyError: nan

During handling of the above exception, another exception occurred:

KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-371-eed5ae626cd0> in <module>()
      1 ggplot(aes(x='match_weighted_cat', 
      2            color='imp_weighted_cat',
----> 3            y='Oct4/profile_match'), dfi_anno.sample(100000)) + geom_boxplot()

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/core/displayhook.py in __call__(self, result)
    255             self.start_displayhook()
    256             self.write_output_prompt()
--> 257             format_dict, md_dict = self.compute_format_data(result)
    258             self.update_user_ns(result)
    259             self.fill_exec_result(result)

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/core/displayhook.py in compute_format_data(self, result)
    149 
    150         """
--> 151         return self.shell.display_formatter.format(result)
    152 
    153     # This can be set to True by the write_output_prompt method in a subclass

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/core/formatters.py in format(self, obj, include, exclude)
    178             md = None
    179             try:
--> 180                 data = formatter(obj)
    181             except:
    182                 # FIXME: log the exception

<decorator-gen-10> in __call__(self, obj)

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/core/formatters.py in catch_format_error(method, self, *args, **kwargs)
    222     """show traceback on failed format call"""
    223     try:
--> 224         r = method(self, *args, **kwargs)
    225     except NotImplementedError:
    226         # don't warn on NotImplementedErrors

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/core/formatters.py in __call__(self, obj)
    700                 type_pprinters=self.type_printers,
    701                 deferred_pprinters=self.deferred_printers)
--> 702             printer.pretty(obj)
    703             printer.flush()
    704             return stream.getvalue()

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/lib/pretty.py in pretty(self, obj)
    398                         if cls is not object \
    399                                 and callable(cls.__dict__.get('__repr__')):
--> 400                             return _repr_pprint(obj, self, cycle)
    401 
    402             return _default_pprint(obj, self, cycle)

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/IPython/lib/pretty.py in _repr_pprint(obj, p, cycle)
    693     """A pprint that just redirects to the normal repr function."""
    694     # Find newlines and replace them with p.break_()
--> 695     output = repr(obj)
    696     for idx,output_line in enumerate(output.splitlines()):
    697         if idx:

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/ggplot.py in __repr__(self)
     84         Print/show the plot
     85         """
---> 86         self.draw()
     87         plt.show()
     88         return '<ggplot: (%d)>' % self.__hash__()

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/ggplot.py in draw(self, return_ggplot)
    177         # assign a default theme
    178         self = deepcopy(self)
--> 179         self._build()
    180 
    181         # If no theme we use the default

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/ggplot.py in _build(self)
    274         # Compute aesthetics to produce data with generalised
    275         # variable names
--> 276         layers.compute_aesthetics(self)
    277 
    278         # Transform data using all scales

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/layer.py in compute_aesthetics(self, plot)
     80     def compute_aesthetics(self, plot):
     81         for l in self:
---> 82             l.compute_aesthetics(plot)
     83 
     84     def compute_statistic(self, layout):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/layer.py in compute_aesthetics(self, plot)
    348             evaled['PANEL'] = data['PANEL']
    349 
--> 350         self.data = add_group(evaled)
    351 
    352     def compute_statistic(self, layout):

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/layer.py in add_group(data)
    479         disc = discrete_columns(data, ignore=['label'])
    480         if disc:
--> 481             data['group'] = ninteraction(data[disc], drop=True)
    482         else:
    483             data['group'] = NO_GROUP

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/utils.py in ninteraction(df, drop)
    288 
    289     if drop:
--> 290         return _id_var(res, drop)
    291     else:
    292         return res

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/utils.py in _id_var(x, drop)
    319             levels = multitype_sort(set(x))
    320 
--> 321         lst = match(x, levels)
    322         lst = [item + 1 for item in lst]
    323 

~/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/utils.py in match(v1, v2, nomatch, incomparables, start)
    138 
    139         with suppress(KeyError):
--> 140             lst[i] = lookup[x] + start
    141 
    142     return lst

KeyboardInterrupt: 
In [367]:
ggplot(aes(x='match_weighted_cat', 
           color='imp_weighted_cat', 
           y='Oct4/profile_match'), dfi_anno.sample(100000)) + geom_boxplot()
Out[367]:
<ggplot: (8730573773232)>
In [354]:
dfi_anno.head()
Out[354]:
Klf4/profile_match Klf4/profile_match_p Klf4/profile_counts Klf4/profile_counts_p Klf4/profile_max Klf4/profile_max_p Nanog/profile_match Nanog/profile_match_p Nanog/profile_counts Nanog/profile_counts_p ... match/Sox2 imp/Klf4 imp/Nanog imp/Oct4 imp/Sox2 example_chrom example_start example_end example_strand example_interval_from_task
0 inf NaN 0.0 1.0 0.0 1.0 inf NaN 1.0 5.0 ... 0.206333 0.050587 0.063537 0.090030 0.036130 chrX 143482572 143483572 * Nanog
1 inf NaN 1.0 1.0 1.0 1.0 inf NaN 0.0 5.0 ... 0.223384 0.032036 0.020173 0.023113 0.029687 chrX 143482572 143483572 * Nanog
2 inf NaN 0.0 1.0 0.0 1.0 inf NaN 1.0 5.0 ... 0.206882 0.018007 0.063500 0.021244 0.022853 chrX 143482572 143483572 * Nanog
3 inf NaN 0.0 1.0 0.0 1.0 inf NaN 1.0 5.0 ... 0.186484 0.017459 0.059497 0.021635 0.023506 chrX 143482572 143483572 * Nanog
4 inf NaN 0.0 1.0 0.0 1.0 inf NaN 1.0 5.0 ... 0.221023 0.020000 0.051363 0.035843 0.030362 chrX 143482572 143483572 * Nanog

5 rows × 57 columns

In [344]:
np.isinf(np.array([np.inf]))
Out[344]:
array([ True])
In [285]:
mr = ModiscoResult(modisco_pdir / f"{task}/modisco.h5")
mr.open()
seqlets = mr._get_seqlets(pattern, trim_frac=0.08)
seqlets = resize_seqlets(seqlets, 200, seqlen=profile_obs.shape[1])
# mr.close()
In [325]:
list(profiles)
Out[325]:
['Klf4', 'Nanog', 'Oct4', 'Sox2']
In [286]:
len(seqlets)
Out[286]:
4265
In [180]:
len(seqlets)
Out[180]:
4265
In [181]:
from basepair.cli.modisco import load_ranges, load_included_samples
In [182]:
ranges = load_ranges(modisco_pdir / task)
In [183]:
load_included_samples(modisco_pdir / task)
Out[183]:
array([False, False, False, ..., False, False, False])
In [184]:
include_samples = np.load(read_json(modisco_pdir / f"{task}/kwargs.json")["filter_npy"])
In [187]:
out_task = 'Oct4'
profile_obs = d.f[f'/targets/profile/{out_task}'][:][include_samples]
In [188]:
ds = DataSpec.load(model_dir / "dataspec.yaml")
In [273]:
seqlets = dfi2seqlets(dfi[(dfi.match_weighted_cat == 'medium')])
seqlets = resize_seqlets(seqlets, 200, seqlen=profile_obs.shape[1])
In [274]:
seqlets[:3]
Out[274]:
[Seqlet(seqname=1, start=317, end=517, name='metacluster_0/pattern_0', strand='+'),
 Seqlet(seqname=1, start=698, end=898, name='metacluster_0/pattern_0', strand='-'),
 Seqlet(seqname=3, start=322, end=522, name='metacluster_0/pattern_0', strand='+')]
In [291]:
seqlet_profile_obs = extract_signal(profile_obs, seqlets)
In [292]:
total_counts = seqlet_profile_obs.sum(axis=-1).sum(axis=-1)
sort_idx = np.argsort(-total_counts)
In [309]:
avg_profile = seqlet_profile_obs.mean(axis=0)
In [312]:
# now, compute the KL distance
In [310]:
plot_stranded_profile(avg_profile)
In [311]:
plot_stranded_profile(p.mean(axis=0))
In [300]:
# probabilities
p = seqlet_profile_obs[sort_idx] / seqlet_profile_obs[sort_idx].sum(axis=1, keepdims=True)

# drop NA's
notnan = ~np.any(np.any(np.isnan(p), axis=-1), axis=-1)
total_counts = seqlet_profile_obs[sort_idx].sum(axis=1)[notnan]
p = p[notnan]

seqlet_idx = np.array([s.seqname for s in seqlets])[notnan]
In [302]:
total_counts.shape
Out[302]:
(4228, 2)
In [279]:
# dropped 
print("Dropped", seqlet_profile_obs.shape[0] - p.shape[0], "profiles with only 0's")
Dropped 71 profiles with only 0's
In [280]:
p.shape
Out[280]:
(14169, 200, 2)
In [281]:
old_total_counts = total_counts
In [303]:
plt.scatter(total_counts[:,0], total_counts[:,1])
Out[303]:
<matplotlib.collections.PathCollection at 0x7f0bdbbe39b0>
In [282]:
plt.plot(total_counts.sum(axis=-1)[:400])
plt.plot(old_total_counts.sum(axis=-1)[:400], label='old')
plt.legend()
Out[282]:
<matplotlib.legend.Legend at 0x7f0b0ed17278>
In [283]:
heatmap_stranded_profile(p[:5000], figsize=(20,20))
In [272]:
heatmap_stranded_profile(p[:5000], figsize=(20,20))

1. Quantify the profile effect -> entropy and total counts

In [201]:
from scipy.stats import entropy
from scipy.special import  rel_entr, kl_div
In [202]:
# S(p_obs)
entropies = entropy(p.swapaxes(0,1)).sum(axis=-1)

# KL(p_obs, p_average)
kl = kl_div(p, p.mean(axis=0, keepdims=True)).mean(axis=-1).sum(axis=-1)
crossentropy = rel_entr(p, p.mean(axis=0, keepdims=True)).mean(axis=-1).sum(axis=-1)
In [204]:
fig = plt.figure(figsize=(13,5))
plt.subplot(131)
plt.plot(entropies);
plt.ylabel("entropy")
plt.xlabel("idx");
plt.subplot(132)
plt.plot(kl, entropies, ".");  # kl divergence and the entropy between the other factor are almost the same
plt.xlabel("kl")
plt.ylabel("Entropy");
plt.subplot(133)
plt.plot(crossentropy, kl, ".");  # kl divergence and the entropy between the other factor are almost the same
plt.xlabel("crossentropy")
plt.ylabel("Entropy");
In [205]:
fig=plt.figure(figsize=(10,4))
plt.subplot(121)
plt.plot(entropies, np.log(1+total_counts.mean(axis=-1)), ".", alpha=0.3);
plt.xlabel("Entropy")
plt.ylabel("log(1+ counts)");
plt.subplot(122)
plt.plot(entropies**2, np.log(1+total_counts.mean(axis=-1)), ".", alpha=0.3);
plt.xlabel(r"Entropy^2")
plt.ylabel("log(1+ counts)");

Conclusion

  • both metrics: counts and entropy are good for characterizing the signal

Question: How do others contribute counts?

Features

Count matrix

In [206]:
df = mr.seqlet_df_instances()
In [23]:
dfp = df.pivot_table("center", "seqname", "pattern", aggfunc=len, fill_value=0)
In [24]:
count_features = dfp.loc[seqlet_idx]
In [26]:
count_features[pattern].value_counts().plot.bar();
plt.xlabel("Number of occurences in the sequence")
plt.ylabel("Frequency");
In [28]:
count_features.iloc[:,count_features.columns!=pattern].sum().plot.bar(figsize=(20,5));
plt.ylabel("Sum");

Boxplot for each factor the change in entropy

In [29]:
def rename_pattern(p):
    if "metacluster" in p:
        return p.replace("metacluster_", "m").replace("/", "_").replace("pattern_", "p")
    else:
        return p
In [30]:
count_features.columns = [rename_pattern(p) for p in count_features.columns]
In [31]:
dfm = count_features.assign(entropy=entropies, counts=np.log10(1 + total_counts.mean(axis=-1)), 
                            example_idx=count_features.index).melt(id_vars=['entropy', 'counts', 'example_idx'], var_name="pattern")
In [32]:
dfmf = dfm.groupby("pattern").filter(lambda x: x.value.sum()> 10)
In [33]:
dfmf.value = pd.Categorical(dfmf.value)
dfm.value = pd.Categorical(dfm.value)
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/pandas/core/generic.py:3643: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
In [45]:
plotnine.options.figure_size = (20,10)
ggplot(aes(x='value', y='entropy'), dfmf) + geom_boxplot() + facet_wrap("~pattern", ncol = 10, scales='free_x') + theme_bw()
Out[45]:
<ggplot: (8768024965668)>

Fit a model to determine the effects

In [35]:
def ols_formula(df, dependent_var, *excluded_cols):
    '''
    Generates the R style formula for statsmodels (patsy) given
    the dataframe, dependent variable and optional excluded columns
    as strings
    '''
    df_columns = list(df.columns.values)
    df_columns.remove(dependent_var)
    for col in excluded_cols:
        df_columns.remove(col)
    return dependent_var + ' ~ ' + ' + '.join(df_columns)
In [36]:
dm = count_features.assign(counts=np.log10(1+total_counts.sum(axis=-1)))
In [37]:
ols_formula(dm, "counts")
Out[37]:
'counts ~ m0_p0 + m0_p1 + m0_p10 + m0_p11 + m0_p12 + m0_p13 + m0_p14 + m0_p15 + m0_p2 + m0_p3 + m0_p4 + m0_p5 + m0_p6 + m0_p7 + m0_p8 + m0_p9 + m1_p0 + m1_p1 + m1_p2 + m1_p3 + m1_p4 + m1_p5 + m1_p6 + m1_p7 + m1_p8 + m10_p0 + m10_p1 + m2_p0 + m2_p1 + m2_p2 + m2_p3 + m2_p4 + m3_p0 + m3_p1 + m3_p2 + m3_p3 + m3_p4 + m3_p5 + m3_p6 + m3_p7 + m3_p8 + m4_p0 + m4_p1 + m4_p2 + m4_p3 + m4_p4 + m4_p5 + m4_p6 + m6_p0 + m6_p1 + m6_p2 + m6_p3 + m7_p0 + m7_p1 + m7_p2 + m7_p3 + m7_p4 + m7_p5 + m8_p0 + m8_p1'
In [38]:
results = smf.ols(ols_formula(dm, "counts"), data=dm).fit()
In [39]:
def tidy_ols(ols_reults):
    smry = results.summary()
    coef = smry.tables[1]
    return pd.DataFrame(coef.data[1:], columns=coef.data[0])
In [40]:
df_fit = tidy_ols(results)
In [41]:
# significant coefficients
df_fit_signif = df_fit[df_fit['P>|t|'].astype(float) < 0.05]
df_fit_signif = df_fit_signif[df_fit_signif[""] != "Intercept"]  # don't show the intercept
df_fit_signif.iloc[df_fit_signif['coef'].astype(float).abs().argsort()].iloc[::-1]  # sort by the effect size
Out[41]:
coef std err t P>|t| [0.025 0.975]
37 m3_p4 0.6035 0.237 2.542 0.011 0.138 1.069
15 m0_p8 0.3219 0.115 2.811 0.005 0.097 0.547
17 m1_p0 -0.1178 0.047 -2.528 0.012 -0.209 -0.026

Conclusions

Significant negative effect

  • m1_p0 - another Nanog motif

Significant positive effect

  • m3_p4 - Klf4 - homodimer - long motif (only 3 instances...)
  • m0_p8 - long motif (Oct4?)
In [42]:
pattern_short = rename_pattern(pattern)
single_motif_idx = count_features.index[count_features[pattern_short] == 1]
df_center = df[df.seqname.isin(single_motif_idx)].query(f"pattern == '{pattern}'")[['seqname', 'center']]
df_counts = pd.DataFrame({"seqname": seqlet_idx,
                          "log_counts": np.log10(1+total_counts.mean(axis=-1))})
dfd = pd.merge(df[df.pattern != pattern], df_center, on='seqname', suffixes=("_other", "_core"))
dfd['rel'] = dfd.center_other - dfd.center_core
dfd = dfd.merge(df_counts, on="seqname")
In [50]:
plotnine.options.figure_size = (20,20)
ggplot(aes(x="rel", y='log_counts'), dfd) + geom_point(alpha=0.5) + facet_wrap("~pattern", ncol = 5) + theme_bw() #+ xlim([-400, 400])
Out[50]:
<ggplot: (-9223363268808843695)>
In [49]:
plotnine.options.figure_size = (20,20)
ggplot(aes(x="rel", y='log_counts'), dfd) + geom_point(alpha=0.5) + facet_wrap("~pattern", ncol = 5) + theme_bw() + xlim([-70, 70])
/users/avsec/bin/anaconda3/envs/chipnexus/lib/python3.6/site-packages/plotnine/layer.py:450: UserWarning: geom_point : Removed 611 rows containing missing values.
  self.data = self.geom.handle_na(self.data)
Out[49]:
<ggplot: (-9223363268845628777)>

Conclusions

  • not much going on