Source code for pipeline.code.dnn.pipeline

# pipeline.py
import os
import numpy as np
from tqdm import tqdm
from pipeline.code.dnn.config.logger import get_logger 
from pipeline.code.dnn.models import train_dnn
from pipeline.code.dnn.dataloader import generate_matrix
from pipeline.code.dnn.analyzer import transform_human_mouse,independent_test,train_test_split
from sklearn.preprocessing import StandardScaler
 
[docs]class DnnPipeline(object): """ Deep Neural Network (DNN) model training and evaluation pipeline. This class implements a full pipeline for training, cross-species transformation, and independent test of DNN models on transcriptional data. Parameters ---------- config : object Configuration object containing paths, parameters, and flags to control the pipeline. Attributes ---------- cfg : object Configuration object. logger : logging.Logger Logger instance for tracking pipeline progress and information. Methods ------- execute() Execute the pipeline based on configuration flags: training, transformation, or testing. train() Run training procedure. trans() Perform cross-species transformation of transcriptional data. test() Runs independent training and test. _process_single_dataset(data_path) Preprocess and train model on independent dataset. _train_single_model(train_data, valid_data, iteration, data_path) Train a model with given data split. """ def __init__(self, config): self.cfg = config self._setup_directories() self.logger = get_logger() def _setup_directories(self): os.makedirs(f"{self.cfg.data_files['save_path']}", exist_ok=True)
[docs] def execute(self): if self.cfg.train: self.logger.info("Starting training") self.train() if self.cfg.trans: self.logger.info("Starting transing") self.trans() if self.cfg.independent_test: self.logger.info("Starting independent testing") self.test()
[docs] def train(self): self.logger.info(f"Training dataset: {self.cfg.data_files['data_path']}") self._process_single_dataset(self.cfg.data_files['data_path'])
[docs] def trans(self): self.logger.info(f"Performing cross-species transformation") transform_human_mouse(self.cfg)
[docs] def test(self): self.logger.info("Starting independent training") self._process_single_dataset(self.cfg.data_files['independent_data_path']) self.logger.info("Starting independent testing") independent_test(params = self.cfg)
[docs] def _process_single_dataset(self, data_path): self.logger.info("Loading dataset") X, y,_,sample_id,all_id = generate_matrix(data_path, self.cfg) dataset = np.concatenate((X, y), axis=1) self.logger.info(f"Dataset shape: {X.shape}") for repeat_i in tqdm(range(self.cfg.repeat_n)): self.logger.info(f"Training iteration {repeat_i+1}/{self.cfg.repeat_n}") train, valid = train_test_split(dataset, valid_size=self.cfg.valid_ratio, sample_id = sample_id, all_id = all_id) scaler = StandardScaler() train_features = train[:, :-1] train_features_scaled = scaler.fit_transform(train_features) valid_features = valid[:, :-1] valid_features_scaled = scaler.transform(valid_features) if self.cfg.shuffle: train_scaled = np.hstack((train_features_scaled, np.random.permutation(train[:, -1].reshape(-1, 1)))) else: train_scaled = np.hstack((train_features_scaled, train[:, -1].reshape(-1, 1))) valid_scaled = np.hstack((valid_features_scaled, valid[:, -1].reshape(-1, 1))) self._train_single_model(train_scaled, valid_scaled, repeat_i,data_path)
[docs] def _train_single_model(self, train_data, valid_data, iteration,data_path): train_dnn( params=self.cfg, train_data=train_data, valid_data=valid_data, iteration=iteration, data_path = data_path )