Skip to content

Commit a0df340

Browse files
aarmeyAaron Meyer
andauthored
Full analysis (#23)
* Save analysis * More cleanup * Fixed * Linting --------- Co-authored-by: Aaron Meyer <git@asmlab.org>
1 parent 5f8c279 commit a0df340

File tree

7 files changed

+93
-210
lines changed

7 files changed

+93
-210
lines changed

pf2barcode/figures/figure4.py

Lines changed: 0 additions & 55 deletions
This file was deleted.

pf2barcode/figures/figure6.py

Lines changed: 0 additions & 30 deletions
This file was deleted.

pf2barcode/imports.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,20 @@
11
from pathlib import Path
22

3-
import anndata
4-
import hdf5plugin # noqa: F401
53
import numpy as np
64
import pandas as pd
75
import scanpy as sc
6+
from anndata import AnnData, concat
7+
from anndata.io import read_text
88
from scipy.sparse import csr_array, csr_matrix
99
from scipy.special import xlogy
10+
from sklearn.preprocessing import scale
1011
from sklearn.utils.sparsefuncs import (
1112
inplace_column_scale,
1213
mean_variance_axis,
1314
)
1415

1516

16-
def prepare_dataset(X: anndata.AnnData, geneThreshold: float) -> anndata.AnnData:
17+
def prepare_dataset(X: AnnData, geneThreshold: float) -> AnnData:
1718
assert isinstance(X.X, csr_matrix)
1819
assert np.amin(X.X.data) >= 0.0
1920

@@ -37,7 +38,7 @@ def prepare_dataset(X: anndata.AnnData, geneThreshold: float) -> anndata.AnnData
3738
return X
3839

3940

40-
def prepare_dataset_dev(X: anndata.AnnData) -> anndata.AnnData:
41+
def prepare_dataset_dev(X: AnnData) -> AnnData:
4142
X.X = csr_array(X.X) # type: ignore
4243
assert np.amin(X.X.data) >= 0.0
4344

@@ -88,7 +89,7 @@ def prepare_dataset_dev(X: anndata.AnnData) -> anndata.AnnData:
8889
return X
8990

9091

91-
def import_CCLE(pca_option="dev_pca") -> anndata.AnnData:
92+
def import_CCLE(pca_option="dev_pca", n_comp=10) -> AnnData:
9293
# pca option should be passed as either pca or glm_pca
9394
"""Imports barcoded cell data."""
9495
adatas = {}
@@ -103,7 +104,7 @@ def import_CCLE(pca_option="dev_pca") -> anndata.AnnData:
103104
# "T1_MDAMB231",
104105
"T2_MDAMB231",
105106
):
106-
data = anndata.read_text(current_dir / "data" / f"{name}_count_mtx.tsv.bz2").T
107+
data = read_text(current_dir / "data" / f"{name}_count_mtx.tsv.bz2").T
107108
barcodes = pd.read_csv(
108109
current_dir / "data" / f"{name}_SW.txt", sep="\t", index_col=0, header=0
109110
)
@@ -115,7 +116,7 @@ def import_CCLE(pca_option="dev_pca") -> anndata.AnnData:
115116
barcode_dfs.append(barcodes)
116117
adatas[name] = data
117118

118-
X = anndata.concat(adatas, label="sample", index_unique="-")
119+
X = concat(adatas, label="sample", index_unique="-")
119120
X.X = csr_matrix(X.X)
120121

121122
counts = X.obs["SW"].value_counts()
@@ -132,9 +133,11 @@ def import_CCLE(pca_option="dev_pca") -> anndata.AnnData:
132133
# conditional statement for either dev_pca or pca
133134
if pca_option == "dev_pca":
134135
X = prepare_dataset_dev(X)
135-
sc.pp.pca(X, n_comps=20, svd_solver="arpack")
136+
X.X = scale(X.X)
137+
sc.pp.pca(X, n_comps=n_comp, svd_solver="arpack")
136138
else:
137139
X = prepare_dataset(X, geneThreshold=0.001)
138-
sc.pp.pca(X, n_comps=20, svd_solver="arpack")
140+
X.X = scale(X.X)
141+
sc.pp.pca(X, n_comps=n_comp, svd_solver="arpack")
139142

140143
return X

pyproject.toml

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,15 @@ authors = [
88
license = { text = "MIT" }
99
requires-python = ">=3.12,<3.13"
1010
dependencies = [
11-
"numpy>=2.0",
12-
"scipy>=1.14",
13-
"scikit-learn>=1.5",
11+
"numpy>=2.2",
12+
"scipy==1.15.3",
13+
"scikit-learn>=1.7",
1414
"seaborn>=0.13",
15-
"pandas>=2.2",
15+
"pandas>=2.3",
1616
"gseapy>=1.1",
17-
"scanpy>=1.10",
18-
"dask[dataframe]>=2024.3.1",
19-
"ipykernel>=6.29.4",
20-
"setuptools>=74.0.0",
21-
"ipywidgets>=8.1.3",
17+
"scanpy>=1.11.2",
18+
"ipykernel>=6.29",
2219
"anndata>=0.11.4",
23-
"hdf5plugin>=5.1.0",
2420
]
2521

2622
[tool.rye]

quarto/analysis.qmd

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
---
2+
title: "Analysis"
3+
format: html
4+
jupyter: python3
5+
---
6+
7+
Generates a bar plot visualizing the relationship of PCs and computed
8+
negative log10 p-values from the Kruskal-Wallis H-test
9+
10+
Computed p-values determines if distributions of PCs are statistically
11+
significantly across different groups, and the negative log10 transformation
12+
of the p-values allows for easier identification and interpretation of signficant PCs
13+
14+
```{python}
15+
import scanpy as sc
16+
from pf2barcode.imports import import_CCLE
17+
from pf2barcode.analysis import anova_pvalues, kruskal_pvalues
18+
19+
import matplotlib.pyplot as plt
20+
import numpy as np
21+
import seaborn as sns
22+
23+
adata = import_CCLE("dev_pca")
24+
25+
# Implement anova_pvalues function
26+
pvalues = anova_pvalues(adata)
27+
28+
# Implement kruskal_pvalues function
29+
# pvalues = kruskal_pvalues(adata)
30+
```
31+
32+
```{python}
33+
# Barplot setup
34+
sns.barplot(x=np.arange(pvalues.shape[0]), y=-np.log10(pvalues))
35+
plt.xlabel("PC")
36+
plt.ylabel("-log10(p-value)")
37+
```
38+
39+
```{python}
40+
sc.pl.pca_variance_ratio(adata)
41+
```
42+
43+
44+
```{python}
45+
sc.pl.pca(adata, color="SW", components="2,3")
46+
```
47+
48+
49+
```{python}
50+
sc.pl.pca_loadings(adata, components="1,2")
51+
```

0 commit comments

Comments
 (0)