|
|
|
import functools |
|
import typing as ty |
|
|
|
import pandas as pd |
|
import RNA |
|
|
|
|
|
@functools.lru_cache() |
|
def duplex_energy(s1: str, s2: str) -> float: |
|
return RNA.duplexfold(s1, s2).energy |
|
|
|
|
|
@functools.lru_cache() |
|
def folded_sequence(sequence, model_details): |
|
folder = RNA.fold_compound(sequence, model_details) |
|
dot_bracket, mfe = folder.mfe() |
|
return dot_bracket, mfe |
|
|
|
|
|
def fold_sequences( |
|
sequences: ty.Iterable[str], temperature: float = 37.0, |
|
) -> pd.DataFrame: |
|
|
|
md = RNA.md() |
|
md.temperature = temperature |
|
|
|
seq2structure_map = { |
|
"sequence": [], |
|
f"structure_{int(temperature)}": [], |
|
f"mfe_{int(temperature)}": [], |
|
} |
|
|
|
for sequence in sequences: |
|
dot_bracket, mfe = folded_sequence(sequence, md) |
|
seq2structure_map["sequence"].append(sequence) |
|
seq2structure_map[f"structure_{int(temperature)}"].append(dot_bracket) |
|
seq2structure_map[f"mfe_{int(temperature)}"].append(mfe) |
|
|
|
return pd.DataFrame(seq2structure_map).set_index("sequence") |
|
|
|
def fraction(seq: str, nucleoids: str) -> float: |
|
"""Computes the fraction of the sequence string that is the set of nucleoids |
|
given. |
|
|
|
Parameters |
|
---------- |
|
seq : str |
|
The sequence string |
|
nucleoids : str |
|
The list of nucleoids to compute the fraction for. |
|
|
|
Returns |
|
------- |
|
float |
|
The fraction |
|
""" |
|
return sum([seq.count(n) for n in nucleoids]) / len(seq) |