from basepair.imports import *
from basepair.exp.chipnexus.motif_clustering import to_colors, preproc_motif_table, motif_table_long, scale
tasks = ['Oct4', 'Sox2', 'Klf4', 'Nanog']
def load_df(modisco_run, min_n_seqlets=100):
df = pd.read_csv(f"http://mitra.stanford.edu/kundaje/avsec/chipnexus/oct-sox-nanog-klf/models/n_dil_layers=9/modisco/{modisco_run}/pattern_table.csv")
df['metacluster'] = df.pattern.str.split("_", expand=True)[0].str.replace("m", "").astype(int)
df['metacluster'] = pd.Categorical(df.metacluster, ordered=True)
df['log n seqlets'] = np.log10(df['n seqlets'])
# filter
df = df[df['n seqlets'] >= min_n_seqlets]
return df
df = load_df(modisco_run)
df = load_df('valid')
dfx, row_df, col_df = preproc_motif_table(df, tasks)
x = scale(dfx).T
g = sns.clustermap(x, row_colors=to_colors(col_df), col_colors=to_colors(row_df), method="weighted", figsize=(20, 10), cmap='RdBu_r', center=0);
from sklearn.cluster.bicluster import SpectralCoclustering
from sklearn.metrics import consensus_score
data = x.values
data, rows, columns = make_biclusters(
shape=(300, 300), n_clusters=5, noise=5,
shuffle=False, random_state=0)
data.shape
rows.shape
from sklearn.datasets import make_biclusters
model = SpectralCoclustering(n_clusters=5, random_state=0)
model.fit(x.values)
fit_data = x.iloc[np.argsort(model.row_labels_)]
fit_data = fit_data.iloc[:, np.argsort(model.column_labels_)]
sns.clustermap(fit_data, col_cluster=False, figsize=(20, 10), row_cluster=False, cmap='RdBu_r', center=0)
sns.clustermap(fit_data, row_cluster=True, col_cluster=True, method="weighted", figsize=(20, 10), cmap='RdBu_r', center=0)
from sklearn.metrics import (adjusted_rand_score as ari,
normalized_mutual_info_score as nmi)
from coclust.coclustering import (CoclustMod, CoclustSpecMod, CoclustInfo)
from coclust.io.data_loading import load_doc_term_data
from coclust.evaluation.internal import best_modularity_partition
from coclust.evaluation.external import accuracy
from coclust.io.notebook import(input_with_default_int, input_with_default_str)
from coclust.visualization import (plot_max_modularities,
plot_intermediate_modularities,
plot_cluster_top_terms,
get_term_graph,
plot_cluster_sizes)
range_n_clusters = list(range(2, 9))
n_rand_init = 1
best_coclustMod_model, all_max_modularities = best_modularity_partition(x.values,
nbr_clusters_range=range_n_clusters, n_rand_init=1)
cocluster = CoclustMod(5)
cocluster.fit(x.values)
x.values.shape
from coclust.visualization import plot_reorganized_matrix
X = x
model = cocluster
row_indices
c
col_indices
len(col_indices)
X.shape
X = x.values
row_indices = np.argsort(model.row_labels_)
col_indices = np.argsort(model.column_labels_)
X_reorg = X[row_indices, :]
X_reorg = X_reorg[:, col_indices]
plot_reorganized_matrix(x, cocluster)
cocluster.get_assignment_matrix(a)