Skip to content

Commit e53381f

Browse files
committed
New figure file and modified import_ccle, comments fixed.
1 parent d72c1e4 commit e53381f

File tree

2 files changed

+83
-0
lines changed

2 files changed

+83
-0
lines changed

pf2barcode/figures/figure4.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""
2+
Figure 4 -- Recreation of Boxplot of the correlation distance for related cells
3+
(blue), and randomly sampled cells from the GEMLI paper
4+
"""
5+
6+
import pandas as pd
7+
import numpy as np
8+
import seaborn as sns
9+
from scipy.spatial.distance import pdist
10+
from pf2barcode.imports import import_CCLE
11+
from .common import getSetup, subplotLabel
12+
from sklearn.preprocessing import scale
13+
14+
15+
def makeFigure():
16+
"""Boxplot of correlation distance for related and random cells per lineage."""
17+
ax, f = getSetup((8, 4), (1, 1))
18+
subplotLabel(ax)
19+
20+
X = import_CCLE(pca_option="none")
21+
22+
# Filter out unknown or rare barcodes
23+
X = X[X.obs["SW"] != "unknown"]
24+
good_SW = X.obs["SW"].value_counts().index[X.obs["SW"].value_counts() > 10]
25+
X = X[X.obs["SW"].isin(good_SW)]
26+
27+
# Convert matrix to dense for correlation computation
28+
mat = X.X.toarray()
29+
df = X.obs.copy()
30+
df["index"] = np.arange(len(df))
31+
32+
results = []
33+
34+
for sw, cells in df.groupby("SW"):
35+
idx = cells["index"].values
36+
if len(idx) < 2:
37+
continue
38+
39+
# Related (within-lineage) distances
40+
related = pdist(mat[idx], metric="correlation")
41+
42+
# Random (same number of pairs)
43+
n_pairs = len(related)
44+
n_cells = mat.shape[0]
45+
random_corrs = []
46+
for _ in range(100):
47+
pairs = np.random.choice(n_cells, (n_pairs, 2), replace=True)
48+
random_corrs.extend(
49+
[1 - np.corrcoef(mat[i], mat[j])[0, 1] for i, j in pairs]
50+
)
51+
52+
results.append(
53+
pd.DataFrame(
54+
{
55+
"Correlation distance": np.concatenate([related, random_corrs]),
56+
"Group": ["Cell lineage"] * len(related)
57+
+ ["Random cells"] * len(random_corrs),
58+
"Lineage": [sw] * (len(related) + len(random_corrs)),
59+
}
60+
)
61+
)
62+
63+
df_plot = pd.concat(results, ignore_index=True)
64+
65+
sns.boxplot(
66+
data=df_plot,
67+
x="Lineage",
68+
y="Correlation distance",
69+
hue="Group",
70+
showfliers=False,
71+
palette={"Cell lineage": "#377eb8", "Random cells": "#bbbbbb"},
72+
ax=ax[0],
73+
)
74+
75+
ax[0].set_title("Correlation distance for related and random cells per lineage")
76+
ax[0].set_xlabel("Lineage barcode (SW)")
77+
ax[0].set_ylabel("Correlation distance")
78+
ax[0].legend(title=None)
79+
80+
return f

pf2barcode/imports.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ def import_CCLE(pca_option="dev_pca", n_comp=10) -> AnnData:
135135
X = prepare_dataset_dev(X)
136136
X.X = scale(X.X)
137137
sc.pp.pca(X, n_comps=n_comp, svd_solver="arpack")
138+
if pca_option == "none":
139+
X = X[X.X.sum(axis=1) >= 10000]
140+
sc.pp.normalize_total(X, target_sum=40000)
138141
else:
139142
X = prepare_dataset(X, geneThreshold=0.001)
140143
X.X = scale(X.X)

0 commit comments

Comments
 (0)