DeepST integration tutorial
[1]:
import os
from DeepST import run
import matplotlib.pyplot as plt
from pathlib import Path
import scanpy as sc
import numpy as np
from sklearn import metrics
import json
import time
import os.path as osp
iters = 1
save_dir_gt = '/home/yunfei/spatial_benchmarking/BenchmarkST/sim_data_results/deepst'
/home/yunfei/anaconda3/envs/DeepST/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
from .autonotebook import tqdm as notebook_tqdm
Mouse Hypothalamus data integration (pair-wise)
[ ]:
"""mhypo"""
section_ids_list = [['-0.04', '-0.09'], ['-0.09', '-0.14'], ['-0.14', '-0.19'], ['-0.19', '-0.24']]
run_times = []
for iter_ in range(iters):
for section_ids in section_ids_list:
dataset = section_ids[0] + '_' + section_ids[1]
start_time = time.time()
# slice1 = load_mHypothalamus(section_id=section_ids[0])
# slice2 = load_mHypothalamus(section_id=section_ids[1])
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/mHypothalamus"
data_name_list = section_ids
save_path = "../Results"
n_domains = 8
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="benchmark_test", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 150)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])
DLPFC data integration (pair-wise)
[ ]:
"""dlpfc"""
section_ids_list = [['151507', '151508'], ['151508', '151509'], ['151509', '151510'], ['151673', '151674'], ['151674', '151675'], ['151675', '151676']]
run_times = []
for section_ids in section_ids_list:
for iter_ in range(iters):
dataset = section_ids[0] + '_' + section_ids[1]
start_time = time.time()
# slice1 = load_mHypothalamus(section_id=section_ids[0])
# slice2 = load_mHypothalamus(section_id=section_ids[1])
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/DLPFC12"
data_name_list = section_ids
save_path = "../Results"
n_domains = 7
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="Visium", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 200)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
# save labels
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])
section_ids_list = [['151669', '151670'], ['151670', '151671'], ['151671', '151672']]
run_times = []
for section_ids in section_ids_list:
for iter_ in range(iters):
dataset = section_ids[0] + '_' + section_ids[1]
start_time = time.time()
# slice1 = load_mHypothalamus(section_id=section_ids[0])
# slice2 = load_mHypothalamus(section_id=section_ids[1])
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/DLPFC12"
data_name_list = section_ids
save_path = "../Results"
n_domains = 5
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="Visium", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 200)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
# save labels
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])
Mouse Hypothalamus data integration (multi-slices)
[ ]:
"""mhypo"""
section_ids_list = [['-0.04', '-0.09', '-0.14', '-0.19', '-0.24']]
run_times = []
for iter_ in range(iters):
for section_ids in section_ids_list:
dataset = section_ids[0] + '_' + section_ids[1] + '_' + section_ids[2] + '_' + section_ids[3] + '_' + section_ids[4]
start_time = time.time()
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/mHypothalamus"
data_name_list = section_ids
save_path = "../Results"
n_domains = 8
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="benchmark_test", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 150)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])
DLPFC data integration (multi-slices)
[ ]:
"""dlpfc"""
section_ids_list = [['151507', '151508', '151509', '151510'], ['151673', '151674', '151675', '151676']]
run_times = []
for section_ids in section_ids_list:
for iter_ in range(iters):
dataset = section_ids[0] + '_' + section_ids[1] + '_' + section_ids[2] + '_' + section_ids[3]
start_time = time.time()
# slice1 = load_mHypothalamus(section_id=section_ids[0])
# slice2 = load_mHypothalamus(section_id=section_ids[1])
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/DLPFC12"
data_name_list = section_ids
save_path = "../Results"
n_domains = 7
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="Visium", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 200)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
# save labels
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])
section_ids_list = [['151669', '151670', '151671', '151672']]
run_times = []
for section_ids in section_ids_list:
for iter_ in range(iters):
dataset = section_ids[0] + '_' + section_ids[1] + '_' + section_ids[2] + '_' + section_ids[3]
start_time = time.time()
# slice1 = load_mHypothalamus(section_id=section_ids[0])
# slice2 = load_mHypothalamus(section_id=section_ids[1])
# run deeepst pairwise alignment
data_path = "/home/yunfei/spatial_benchmarking/benchmarking_data/DLPFC12"
data_name_list = section_ids
save_path = "../Results"
n_domains = 5
deepen = run(task = "Integration",
pre_epochs = 800, #### According to your own hardware, choose the number of training
epochs = 1000, #### According to your own hardware, choose the number of training
use_gpu = True
)
augement_data_list = []
graph_list = []
for i in range(len(section_ids)):
adata = deepen._get_adata(platform="Visium", data_path=data_path, data_name=data_name_list[i])
# adata = deepen._get_image_crop(adata, data_name=data_name_list[i])
# adata.obs = adata.obs.rename(columns={'imagerow': 'array_row', 'imagecol': 'array_col'})
# print(adata.obs)
adata = deepen._get_augment(adata, spatial_type="KDTree", use_morphological=False)
graph_dict = deepen._get_graph(adata.obsm["spatial"], distType = "KDTree")
augement_data_list.append(adata)
graph_list.append(graph_dict)
######## Synthetic Datasets and Graphs
multiple_adata, multiple_graph = deepen._get_multiple_adata(adata_list = augement_data_list, data_name_list = data_name_list, graph_list = graph_list)
###### Enhanced data preprocessing
data = deepen._data_process(multiple_adata, pca_n_comps = 200)
deepst_embed = deepen._fit(
data = data,
graph_dict = multiple_graph,
domains = multiple_adata.obs["batch"].values, ##### Input to Domain Adversarial Model
n_domains = len(data_name_list))
multiple_adata.obsm["DeepST_embed"] = deepst_embed
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
# np.save(os.path.join(save_dir_r, 'deepst_' + sec1 + '_' + sec2 + '_iter_' + str(e) + '_deepst_embedding.npy'), adata.obsm['DeepST_embed'])
# save alignment matrix
if not os.path.exists(os.path.join(save_dir_gt, dataset)):
os.makedirs(os.path.join(save_dir_gt, dataset))
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'embedding.npy'), multiple_adata.obsm['DeepST_embed'])
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'prediction.npy'), multiple_adata.obs['DeepST_refine_domain'])
end_time = time.time()
run_times.append(end_time - start_time)
# save labels
np.save(osp.join(save_dir_gt, dataset, 'iter'+str(iter_)+'labels.npy'), multiple_adata.obs['original_clusters'])
multiple_adata = deepen._get_cluster_data(multiple_adata, n_domains=n_domains, priori = True)
for data_name in data_name_list:
adata = multiple_adata[multiple_adata.obs["batch_name"]==data_name]
np.save(osp.join(save_dir_gt, dataset, 'iter'+data_name+'_'+str(iter_)+'preds.npy'), adata.obs['DeepST_refine_domain'])