Yak-hbdx's picture
uploaded TransfoRNA repo
0b11a42 verified
raw
history blame
1.44 kB
import functools
import typing as ty
import pandas as pd
import RNA
@functools.lru_cache()
def duplex_energy(s1: str, s2: str) -> float:
return RNA.duplexfold(s1, s2).energy
@functools.lru_cache()
def folded_sequence(sequence, model_details):
folder = RNA.fold_compound(sequence, model_details)
dot_bracket, mfe = folder.mfe()
return dot_bracket, mfe
def fold_sequences(
sequences: ty.Iterable[str], temperature: float = 37.0,
) -> pd.DataFrame:
md = RNA.md()
md.temperature = temperature
seq2structure_map = {
"sequence": [],
f"structure_{int(temperature)}": [],
f"mfe_{int(temperature)}": [],
}
for sequence in sequences:
dot_bracket, mfe = folded_sequence(sequence, md)
seq2structure_map["sequence"].append(sequence)
seq2structure_map[f"structure_{int(temperature)}"].append(dot_bracket)
seq2structure_map[f"mfe_{int(temperature)}"].append(mfe)
return pd.DataFrame(seq2structure_map).set_index("sequence")
def fraction(seq: str, nucleoids: str) -> float:
"""Computes the fraction of the sequence string that is the set of nucleoids
given.
Parameters
----------
seq : str
The sequence string
nucleoids : str
The list of nucleoids to compute the fraction for.
Returns
-------
float
The fraction
"""
return sum([seq.count(n) for n in nucleoids]) / len(seq)