CAMEX aligns various development stages of seven organs across seven different species
This tutorial demonstrates that CAMEX aligns various development stages of seven organs across seven different species.
Here, we use a RNA-seq data from seven organs across seven species at various developmental stages. Processed h5ad files can be downloaded from https://drive.google.com/drive/folders/1rwdjEvWFEFw82a0x2JzMi2jXICbUc5eb?usp=sharing
[1]:
import warnings
warnings.filterwarnings("ignore")
[2]:
import os
import time
import torch
import shutil
import warnings
import argparse
import importlib
import scanpy as sc
import pandas as pd
import numpy as mp
from CAMEX.base import Dataset
from CAMEX.trainer import Trainer
[2]:
from params import PARAMS
[3]:
t1 = time.time()
make log dir
[4]:
time_start = time.strftime("%Y-%m-%d-%H-%M-%S")
log_path = f'./log/{time_start}/'
for k, v in PARAMS.items():
v['time_start'] = time_start
v['log_path'] = log_path
print(log_path)
./log/2024-04-12-16-58-15/
[5]:
os.makedirs(log_path, exist_ok=True)
shutil.copy('params.py', log_path + 'params_current.py')
print(f'time: {time_start}')
time: 2024-04-12-16-58-15
preprocess scRNA_seq data to construct a heterogeneous graph of cells and genes
[6]:
# —————————————————————————————————— 1 preprocess
print('start preprocess')
dataset = Dataset(**PARAMS['preprocess'])
adata_CAMEX = dataset.adata_whole
dgl_data = dataset.dgl_data
start preprocess
raw-all-human-Margarida: reference raw-all-macaque-Margarida: query raw-all-mouse-Margarida: query raw-all-rat-Margarida: query raw-all-rabbit-Margarida: query raw-all-opossum-Margarida: query raw-all-chicken-Margarida: query
Cerebellum 58 16.0 43 53 45 42 36
Brain 53 26.0 55 65 57 42 36
Liver 49 34.0 57 60 55 43 36
Heart 44 28.0 55 65 56 39 37
Testis 39 28.0 27 30 28 22 17
Kidney 36 33.0 51 48 48 24 35
Ovary 18 NaN 28 29 26 20 18
integration
[7]:
print('start train')
trainer = Trainer(adata_CAMEX, dgl_data, **PARAMS['train'])
start train
[8]:
trainer.integration()
--------------------------------------------- integration ---------------------------------------------
epoch: 0, loss: 4607.58544921875
epoch: 1, loss: 2490.171142578125
epoch: 2, loss: 1467.8280029296875
epoch: 3, loss: 954.9205932617188
epoch: 4, loss: 640.1492309570312
epoch: 5, loss: 433.93695068359375
epoch: 6, loss: 300.9430847167969
epoch: 7, loss: 214.95994567871094
epoch: 8, loss: 159.0083770751953
epoch: 9, loss: 121.76605224609375
epoch: 10, loss: 98.37760162353516
epoch: 11, loss: 82.01410675048828
epoch: 12, loss: 73.05314636230469
epoch: 13, loss: 66.70439910888672
epoch: 14, loss: 63.745574951171875
epoch: 15, loss: 62.407020568847656
epoch: 16, loss: 60.99980926513672
epoch: 17, loss: 60.73827362060547
epoch: 18, loss: 60.06874465942383
epoch: 19, loss: 59.088157653808594
epoch: 20, loss: 59.054168701171875
epoch: 21, loss: 59.44342041015625
epoch: 22, loss: 58.84150695800781
epoch: 23, loss: 58.24412536621094
epoch: 24, loss: 58.91285705566406
epoch: 25, loss: 58.658206939697266
epoch: 26, loss: 57.68967819213867
epoch: 27, loss: 57.58855438232422
epoch: 28, loss: 57.252845764160156
epoch: 29, loss: 56.78086853027344
epoch: 30, loss: 56.5086555480957
epoch: 31, loss: 56.173423767089844
epoch: 32, loss: 55.87213897705078
epoch: 33, loss: 55.640342712402344
epoch: 34, loss: 55.28804016113281
epoch: 35, loss: 55.35758972167969
epoch: 36, loss: 54.749366760253906
epoch: 37, loss: 54.94860076904297
epoch: 38, loss: 54.805625915527344
epoch: 39, loss: 54.447227478027344
epoch: 40, loss: 54.16265106201172
epoch: 41, loss: 54.158287048339844
epoch: 42, loss: 54.37677001953125
epoch: 43, loss: 53.94585037231445
epoch: 44, loss: 54.1090202331543
epoch: 45, loss: 53.78874206542969
epoch: 46, loss: 53.840911865234375
epoch: 47, loss: 53.45828628540039
epoch: 48, loss: 53.54559326171875
epoch: 49, loss: 53.48744583129883
epoch: 50, loss: 53.3355712890625
epoch: 51, loss: 53.18788146972656
epoch: 52, loss: 53.66065979003906
epoch: 53, loss: 53.202171325683594
epoch: 54, loss: 53.10791015625
epoch: 55, loss: 52.801490783691406
epoch: 56, loss: 53.080421447753906
epoch: 57, loss: 52.98455047607422
epoch: 58, loss: 52.86189270019531
epoch: 59, loss: 52.87925338745117
epoch: 60, loss: 52.553199768066406
epoch: 61, loss: 52.749717712402344
epoch: 62, loss: 52.56831359863281
epoch: 63, loss: 52.50804138183594
epoch: 64, loss: 52.300193786621094
epoch: 65, loss: 52.411033630371094
epoch: 66, loss: 52.36540222167969
epoch: 67, loss: 51.95488739013672
epoch: 68, loss: 52.149803161621094
epoch: 69, loss: 52.06043243408203
epoch: 70, loss: 51.96537399291992
epoch: 71, loss: 51.77614974975586
epoch: 72, loss: 51.99458694458008
epoch: 73, loss: 51.72344207763672
epoch: 74, loss: 51.66328430175781
epoch: 75, loss: 51.61555480957031
epoch: 76, loss: 51.4632453918457
epoch: 77, loss: 51.57242202758789
epoch: 78, loss: 51.53713607788086
epoch: 79, loss: 51.412540435791016
epoch: 80, loss: 51.20708465576172
epoch: 81, loss: 51.246883392333984
epoch: 82, loss: 51.06980895996094
epoch: 83, loss: 51.19629669189453
epoch: 84, loss: 51.29935836791992
epoch: 85, loss: 51.036376953125
epoch: 86, loss: 51.149208068847656
epoch: 87, loss: 51.34392547607422
epoch: 88, loss: 50.999691009521484
epoch: 89, loss: 50.881141662597656
epoch: 90, loss: 50.71076202392578
epoch: 91, loss: 50.899452209472656
epoch: 92, loss: 50.86601257324219
epoch: 93, loss: 50.66194152832031
epoch: 94, loss: 50.56133270263672
epoch: 95, loss: 50.62682342529297
epoch: 96, loss: 50.681758880615234
epoch: 97, loss: 50.54673385620117
epoch: 98, loss: 50.34874725341797
epoch: 99, loss: 50.592742919921875
epoch: 100, loss: 50.18242263793945
epoch: 101, loss: 50.17028045654297
epoch: 102, loss: 50.567806243896484
epoch: 103, loss: 50.15935134887695
epoch: 104, loss: 50.1728515625
epoch: 105, loss: 50.25212860107422
epoch: 106, loss: 50.06218719482422
epoch: 107, loss: 50.37501907348633
epoch: 108, loss: 50.11683654785156
epoch: 109, loss: 49.974098205566406
epoch: 110, loss: 50.1690673828125
epoch: 111, loss: 50.06644058227539
epoch: 112, loss: 50.10094451904297
epoch: 113, loss: 49.878387451171875
epoch: 114, loss: 49.720916748046875
epoch: 115, loss: 49.83085632324219
epoch: 116, loss: 49.81055450439453
epoch: 117, loss: 49.8421745300293
epoch: 118, loss: 49.58333969116211
epoch: 119, loss: 49.72449493408203
epoch: 120, loss: 49.84004211425781
epoch: 121, loss: 49.77716827392578
epoch: 122, loss: 49.78533935546875
epoch: 123, loss: 49.6264533996582
epoch: 124, loss: 49.715667724609375
epoch: 125, loss: 49.65956115722656
epoch: 126, loss: 49.5895881652832
epoch: 127, loss: 49.5565299987793
epoch: 128, loss: 49.60377502441406
epoch: 129, loss: 49.33171081542969
epoch: 130, loss: 49.378379821777344
epoch: 131, loss: 49.55915832519531
epoch: 132, loss: 49.65848159790039
epoch: 133, loss: 49.39168167114258
epoch: 134, loss: 49.212318420410156
epoch: 135, loss: 49.26879119873047
epoch: 136, loss: 49.477081298828125
epoch: 137, loss: 49.137046813964844
epoch: 138, loss: 49.35836410522461
epoch: 139, loss: 49.23335647583008
epoch: 140, loss: 49.20266342163086
epoch: 141, loss: 49.23248291015625
epoch: 142, loss: 48.91448211669922
epoch: 143, loss: 48.9366340637207
epoch: 144, loss: 49.18742370605469
epoch: 145, loss: 49.29682159423828
epoch: 146, loss: 49.39116287231445
epoch: 147, loss: 49.143043518066406
epoch: 148, loss: 49.30064392089844
epoch: 149, loss: 49.25562286376953
epoch: 150, loss: 49.08995056152344
epoch: 151, loss: 48.94325256347656
epoch: 152, loss: 49.079566955566406
epoch: 153, loss: 48.89521026611328
epoch: 154, loss: 48.93003463745117
epoch: 155, loss: 48.99842834472656
epoch: 156, loss: 48.68363952636719
epoch: 157, loss: 48.928016662597656
epoch: 158, loss: 48.87431335449219
epoch: 159, loss: 48.6668701171875
epoch: 160, loss: 48.762760162353516
epoch: 161, loss: 48.88435363769531
epoch: 162, loss: 48.719482421875
epoch: 163, loss: 48.958465576171875
epoch: 164, loss: 49.04628372192383
epoch: 165, loss: 48.8967170715332
epoch: 166, loss: 48.857810974121094
epoch: 167, loss: 48.84418869018555
epoch: 168, loss: 48.79505920410156
epoch: 169, loss: 48.91645812988281
epoch: 170, loss: 48.71501159667969
epoch: 171, loss: 49.033077239990234
epoch: 172, loss: 48.56593704223633
epoch: 173, loss: 48.55734634399414
epoch: 174, loss: 48.603302001953125
epoch: 175, loss: 48.6343879699707
epoch: 176, loss: 48.58610916137695
epoch: 177, loss: 48.565704345703125
epoch: 178, loss: 48.623085021972656
epoch: 179, loss: 48.864315032958984
epoch: 180, loss: 48.620182037353516
epoch: 181, loss: 48.42325973510742
epoch: 182, loss: 48.576934814453125
epoch: 183, loss: 48.48506546020508
epoch: 184, loss: 48.43476867675781
epoch: 185, loss: 48.340877532958984
epoch: 186, loss: 48.54323959350586
epoch: 187, loss: 48.69573974609375
epoch: 188, loss: 48.468448638916016
epoch: 189, loss: 48.30616760253906
epoch: 190, loss: 48.582618713378906
epoch: 191, loss: 48.498291015625
epoch: 192, loss: 48.609413146972656
epoch: 193, loss: 48.454566955566406
epoch: 194, loss: 48.3724479675293
epoch: 195, loss: 48.208648681640625
epoch: 196, loss: 48.22394561767578
epoch: 197, loss: 48.36925506591797
epoch: 198, loss: 48.47068786621094
epoch: 199, loss: 48.241371154785156
[18]:
adata_CAMEX.write_h5ad(log_path + 'adata_CAMEX.h5ad', compression='gzip')
[16]:
t2 = time.time()
[17]:
print(f'time usage: {round(t2-t1)} seconds')
time usage: 214 seconds
[17]:
print(f'time usage: {round(t2-t1)} seconds')
time usage: 214 seconds
analysis
[10]:
log_path
[10]:
'./log/2024-04-12-16-58-15/'
[11]:
adata_CAMEX = sc.read_h5ad(log_path + 'adata_CAMEX.h5ad')
adata_CAMEX
[11]:
AnnData object with n_obs × n_vars = 1890 × 2000
obs: 'meta', 'tissue', 'cell_ontology_class', 'stage', 'sample', 'major_stage', 'batch', 'n_genes_by_counts', 'total_counts', 'cell_ontology_class_num', 'cell_class', 'cell_class_num'
var: 'n_cells_by_counts', 'mean_counts', 'pct_dropout_by_counts', 'total_counts', 'highly_variable', 'means', 'dispersions', 'dispersions_norm', 'highly_variable_nbatches', 'highly_variable_intersection', 'mean', 'std'
uns: 'cell_type', 'data_order', 'dataset_description', 'dataset_type', 'hvg', 'log1p', 'neighbors', 'pca'
obsm: 'X_CAMEX_Integration', 'X_pca'
varm: 'PCs'
layers: 'counts'
obsp: 'connectivities', 'distances'
[12]:
adata_CAMEX.obsm['X_CAMEX_Integration'].shape
[12]:
(1890, 128)
[13]:
sc.pp.neighbors(adata_CAMEX, use_rep='X_CAMEX_Integration')
[14]:
sc.tl.umap(adata_CAMEX)
[15]:
sc.pl.umap(adata_CAMEX, color=['batch', 'cell_ontology_class'], wspace=0.6)