Spaces:
Running
Running
import tensorflow as tf | |
import pandas as pd | |
import numpy as np | |
from operator import add | |
from functools import reduce | |
# configure GPUs | |
for gpu in tf.config.list_physical_devices('GPU'): | |
tf.config.experimental.set_memory_growth(gpu, enable=True) | |
if len(tf.config.list_physical_devices('GPU')) > 0: | |
tf.config.experimental.set_visible_devices(tf.config.list_physical_devices('GPU')[0], 'GPU') | |
ntmap = {'A': (1, 0, 0, 0), | |
'C': (0, 1, 0, 0), | |
'G': (0, 0, 1, 0), | |
'T': (0, 0, 0, 1) | |
} | |
epimap = {'A': 1, 'N': 0} | |
def get_seqcode(seq): | |
return np.array(reduce(add, map(lambda c: ntmap[c], seq.upper()))).reshape( | |
(1, len(seq), -1)) | |
def get_epicode(eseq): | |
return np.array(list(map(lambda c: epimap[c], eseq))).reshape(1, len(eseq), -1) | |
class Episgt: | |
def __init__(self, fpath, num_epi_features, with_y=True): | |
self._fpath = fpath | |
self._ori_df = pd.read_csv(fpath, sep='\t', index_col=None, header=None) | |
self._num_epi_features = num_epi_features | |
self._with_y = with_y | |
self._num_cols = num_epi_features + 2 if with_y else num_epi_features + 1 | |
self._cols = list(self._ori_df.columns)[-self._num_cols:] | |
self._df = self._ori_df[self._cols] | |
def length(self): | |
return len(self._df) | |
def get_dataset(self, x_dtype=np.float32, y_dtype=np.float32): | |
x_seq = np.concatenate(list(map(get_seqcode, self._df[self._cols[0]]))) | |
x_epis = np.concatenate([np.concatenate(list(map(get_epicode, self._df[col]))) for col in | |
self._cols[1: 1 + self._num_epi_features]], axis=-1) | |
x = np.concatenate([x_seq, x_epis], axis=-1).astype(x_dtype) | |
x = x.transpose(0, 2, 1) | |
if self._with_y: | |
y = np.array(self._df[self._cols[-1]]).astype(y_dtype) | |
return x, y | |
else: | |
return x | |
from keras.models import load_model | |
class DCModelOntar: | |
def __init__(self, ontar_model_dir, is_reg=False): | |
if is_reg: | |
self.model = load_model(ontar_model_dir) | |
else: | |
self.model = load_model(ontar_model_dir) | |
def ontar_predict(self, x, channel_first=True): | |
if channel_first: | |
x = x.transpose([0, 2, 3, 1]) | |
yp = self.model.predict(x) | |
return yp.ravel() | |
def predict(): | |
file_path = 'eg_cls_on_target.episgt' | |
input_data = Episgt(file_path, num_epi_features=4, with_y=True) | |
x, y = input_data.get_dataset() | |
x = np.expand_dims(x, axis=2) # shape(x) = [100, 8, 1, 23] | |
dcModel = DCModelOntar('on-cla.h5') | |
predicted_on_target = dcModel.ontar_predict(x) | |
return predicted_on_target |