import functools import typing as ty import pandas as pd import RNA @functools.lru_cache() def duplex_energy(s1: str, s2: str) -> float: return RNA.duplexfold(s1, s2).energy @functools.lru_cache() def folded_sequence(sequence, model_details): folder = RNA.fold_compound(sequence, model_details) dot_bracket, mfe = folder.mfe() return dot_bracket, mfe def fold_sequences( sequences: ty.Iterable[str], temperature: float = 37.0, ) -> pd.DataFrame: md = RNA.md() md.temperature = temperature seq2structure_map = { "sequence": [], f"structure_{int(temperature)}": [], f"mfe_{int(temperature)}": [], } for sequence in sequences: dot_bracket, mfe = folded_sequence(sequence, md) seq2structure_map["sequence"].append(sequence) seq2structure_map[f"structure_{int(temperature)}"].append(dot_bracket) seq2structure_map[f"mfe_{int(temperature)}"].append(mfe) return pd.DataFrame(seq2structure_map).set_index("sequence") def fraction(seq: str, nucleoids: str) -> float: """Computes the fraction of the sequence string that is the set of nucleoids given. Parameters ---------- seq : str The sequence string nucleoids : str The list of nucleoids to compute the fraction for. Returns ------- float The fraction """ return sum([seq.count(n) for n in nucleoids]) / len(seq)