GraphST tutorial
0. import packages and select GPU if accessible
[7]:
import os
import torch
import pandas as pd
import scanpy as sc
from sklearn import metrics
import multiprocessing as mp
import numpy as np
from GraphST import GraphST
from GraphST.utils import clustering
from st_loading_utils import load_DLPFC, load_BC, load_mVC, load_mPFC, load_mHypothalamus, load_her2_tumor, load_mMAMP
# Run device, by default, the package is implemented on 'cpu'. We recommend using GPU.
device = torch.device('cuda:3' if torch.cuda.is_available() else 'cpu')
1. DLPFC dataset
change ‘${dir_}’ to ‘path/to/your/DLPFC/data’
[ ]:
"""DLPFC"""
setting_combinations = [[7, '151507'], [7, '151508'], [7, '151509'], [7, '151510'], [5, '151669'], [5, '151670'], [5, '151671'], [5, '151672'], [7, '151673'], [7, '151674'], [7, '151675'], [7, '151676']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0] # 7
dataset = setting_combi[1] # '151673'
dir_ = './benchmarking_data/DLPFC12'
ad = load_DLPFC(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(20):
# print(ad)
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
print('Dataset:', dataset)
print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('DLPFC' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
2. BC/MA datasets
[ ]:
"""BC"""
# the number of clusters
setting_combinations = [[20, 'section1']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
dir_ = './benchmarking_data/BC'
ad = load_BC(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
# print(ad)
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('HBRC1 ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
[ ]:
"""load mouse anterior brain section"""
setting_combinations = [[52, 'MA']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
dir_ = './benchmarking_data/mMAMP'
ad = load_mMAMP(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('mAB' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
3. mVC/mPFC datasets
[ ]:
"""mVC"""
setting_combinations = [[7, 'STARmap_20180505_BY3_1k.h5ad']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
dir_ = './benchmarking_data/STARmap_mouse_visual_cortex'
ad = load_mVC(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('mVC ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
[ ]:
"""mPFC"""
setting_combinations = [[4, '20180417_BZ5_control'], [4, '20180419_BZ9_control'], [4, '20180424_BZ14_control']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
dir_ = './benchmarking_data/STARmap_mouse_PFC'
ad = load_mPFC(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
# print(ad)
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('mPFC' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
4. mHypothalamus dataset
[ ]:
"""mHypo"""
setting_combinations = [[8, '-0.04'], [8, '-0.09'], [8, '-0.14'], [8, '-0.19'], [8, '-0.24'], [8, '-0.29']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1] #
dir_ = './benchmarking_data/mHypothalamus'
ad = load_mHypothalamus(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
# print(ad)
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('mHypothalamus' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')
5. Her2Tumor dataset
[ ]:
"""Her2"""
setting_combinations = [[6, 'A1'], [5, 'B1'], [4, 'C1'], [4, 'D1'], [4, 'E1'], [4, 'F1'], [7, 'G2'], [7, 'H1']]
for setting_combi in setting_combinations:
n_clusters = setting_combi[0]
dataset = setting_combi[1]
dir_ = './benchmarking_data/Her2_tumor'
ad = load_her2_tumor(root_dir=dir_, section_id=dataset)
aris = []
for iter in range(5):
# print(ad)
# define model
model = GraphST.GraphST(ad, device=device)
# train model
ad = model.train()
# print(ad)
# set radius to specify the number of neighbors considered during refinement
radius = 50
tool = 'mclust' # mclust, leiden, and louvain
# clustering
if tool == 'mclust':
clustering(ad, n_clusters, radius=radius, method=tool, refinement=True) # For DLPFC dataset, we use optional refinement step.
elif tool in ['leiden', 'louvain']:
clustering(ad, n_clusters, radius=radius, method=tool, start=0.1, end=2.0, increment=0.01, refinement=False)
# filter out NA nodes
ad = ad[~pd.isnull(ad.obs['original_clusters'])]
# calculate metric ARI
ARI = metrics.adjusted_rand_score(ad.obs['domain'], ad.obs['original_clusters'])
ad.uns['ARI'] = ARI
# print('Dataset:', dataset)
# print('ARI:', ARI)
aris.append(ARI)
print('Dataset:', dataset)
print(aris)
print(np.mean(aris))
with open('graphst_aris.txt', 'a+') as fp:
fp.write('Her2tumor' + dataset + ' ')
fp.write(' '.join([str(i) for i in aris]))
fp.write('\n')