DeepST tutorial
0. import packages and select GPU if accessible
[ ]:
import os
from DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.metrics import adjusted_rand_score
import numpy as np
#In order to read in image data, we need to install some package. Here we recommend package "opencv"
#inatll opencv in python
#!pip3 install opencv-python
from st_loading_utils import load_DLPFC, load_BC, load_mVC, load_mPFC, load_mHypothalamus, load_her2_tumor, load_mMAMP
iters = 2
1. DLPFC dataset
change ‘${dir_}’ to ‘path/to/your/DLPFC/data’
[ ]:
"""DLPFC"""
setting_combinations = [[7, '151507'], [7, '151508'], [7, '151509'], [7, '151510'], [5, '151669'], [5, '151670'], [5, '151671'], [5, '151672'], [7, '151673'], [7, '151674'], [7, '151675'], [7, '151676']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0] # 7
dataset = setting_combi[1] # '151673'
aris = []
dir_ = './benchmarking_data/DLPFC12'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "Visium",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
Conv_type="GCNConv", #### you can choose GNN types.
)
adata_ = deepen._get_adata(dir_, dataset)
for iter_ in range(iters):
adata = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType="BallTree", k=12)
adata = deepen._fit(adata, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('DLPFC' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
2. BC/MA datasets
[ ]:
"""BC"""
# the number of clusters
setting_combinations = [[20, 'section1']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0] # 7
dataset = setting_combi[1] #
aris = []
dir_ = './benchmarking_data/BC'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "Visium",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
Conv_type="GCNConv", #### you can choose GNN types.
)
adata_ = deepen._get_adata(dir_, dataset)
for iter_ in range(iters):
adata = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType="BallTree", k=12)
adata = deepen._fit(adata, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('HBRC1 ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
[ ]:
"""BC"""
# the number of clusters
setting_combinations = [[20, 'section1']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0] # 7
dataset = setting_combi[1] #
aris = []
dir_ = './benchmarking_data/BC'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "Visium",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
Conv_type="GCNConv", #### you can choose GNN types.
)
adata_ = deepen._get_adata(dir_, dataset)
for iter_ in range(iters):
adata = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType="BallTree", k=12)
adata = deepen._fit(adata, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('HBRC1 ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
3. mVC/mPFC datasets
[ ]:
"""mVC"""
# the number of clusters
setting_combinations = [[7, 'STARmap_20180505_BY3_1k.h5ad']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
aris = []
dir_ = './benchmarking_data/STARmap_mouse_visual_cortex'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "benchmark_test",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
)
# adata_ = deepen._get_adata(dir_, dataset)
adata_, graph_dict = deepen._get_single_adata(dir_, dataset, weights="weights_matrix_nomd") #### Augmentation without using morphological information
# adata_ = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
# graph_dict = deepen._get_graph(adata_.obsm["spatial"], distType="BallTree", k=12)
for iter_ in range(iters):
adata = deepen._fit(adata_, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('mVC ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
[ ]:
"""mPFC"""
# the number of clusters
setting_combinations = [[4, '20180417_BZ5_control'], [4, '20180419_BZ9_control'], [4, '20180424_BZ14_control']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
aris = []
dir_ = './benchmarking_data/STARmap_mouse_PFC'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "benchmark_test",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
)
# adata_ = deepen._get_adata(dir_, dataset)
adata_, graph_dict = deepen._get_single_adata(dir_, dataset, weights="weights_matrix_nomd") #### Augmentation without using morphological information
# adata_ = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
# graph_dict = deepen._get_graph(adata_.obsm["spatial"], distType="BallTree", k=12)
for iter_ in range(iters):
adata = deepen._fit(adata_, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('mPFC' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
4. mHypothalamus dataset
[ ]:
"""mHypo"""
setting_combinations = [[8, '-0.04'], [8, '-0.09'], [8, '-0.14'], [8, '-0.19'], [8, '-0.24']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
aris = []
dir_ = './benchmarking_data/mHypothalamus'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "benchmark_test",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
)
# adata_ = deepen._get_adata(dir_, dataset)
adata_, graph_dict = deepen._get_single_adata(dir_, dataset, weights="weights_matrix_nomd") #### Augmentation without using morphological information
# adata_ = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
# graph_dict = deepen._get_graph(adata_.obsm["spatial"], distType="BallTree", k=12)
for iter_ in range(iters):
adata = deepen._fit(adata_, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('mHypothalamus' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
5. Her2Tumor dataset
[ ]:
"""Her2"""
setting_combinations = [[6, 'A1'], [5, 'B1'], [4, 'C1'], [4, 'D1'], [4, 'E1'], [4, 'F1'], [7, 'G2'], [7, 'H1']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
aris = []
dir_ = './benchmarking_data/Her2_tumor'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "benchmark_test",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
)
# adata_ = deepen._get_adata(dir_, dataset)
adata_, graph_dict = deepen._get_single_adata(dir_, dataset, weights="weights_matrix_nomd") #### Augmentation without using morphological information
# adata_ = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
# graph_dict = deepen._get_graph(adata_.obsm["spatial"], distType="BallTree", k=12)
for iter_ in range(iters):
adata = deepen._fit(adata_, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('Her2tumor' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
6. mouse hippo
[ ]:
"""mouse hippo"""
setting_combinations = [[14, 'sshippo.h5ad']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0] # 7
dataset = setting_combi[1] # '151673'
aris = []
dir_ = './benchmarking_data/mouse_hyppocampus_slideseqv2'
save_path = '../results/' + dataset + '/'
deepen = run(save_path = save_path,
platform = "Visium",
pca_n_comps = 200,
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
Conv_type="GCNConv", #### you can choose GNN types.
)
adata_ = deepen._get_adata(dir_, dataset)
for iter_ in range(iters):
adata = deepen._get_augment(adata_, adjacent_weight = 0.3, neighbour_k = 4)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType="BallTree", k=12)
adata = deepen._fit(adata, graph_dict, pretrain = False)
adata = deepen._get_cluster_data(adata, n_domains = n_clusters, priori=True) ###### without using prior knowledge, setting priori = False.
print(adata.obs)
ARI = adjusted_rand_score(adata.obs["DeepST_refine_domain"], adata.obs["original_clusters"])
aris.append(ARI)
print(iter_)
print('Dataset:', dataset)
print(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('deepst_aris.txt', 'a+') as fp:
fp.write('DLPFC' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')