oiv_ld_phenotyping / src /leaf_patch_gen_diff.py
treizh's picture
Upload folder using huggingface_hub (#5)
edacdfe verified
raw
history blame
7.09 kB
import numpy as np
import pandas as pd
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.regression.linear_model import RegressionResultsWrapper
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from matplotlib.figure import Figure
import seaborn as sns
import panel as pn
import com_const as cc
import com_func as cf
import com_image as ci
stars = [-np.log(0.05), -np.log(0.01), -np.log(0.001), -np.log(0.0001)]
def plot_single_progression(
ax,
df,
target,
title: str,
hue="gen",
style="gen",
show_legend: bool = False,
):
lp = sns.lineplot(
df.sort_values(hue),
x="dpi",
y=target,
hue=hue,
markers=True,
style=style,
dashes=False,
palette="tab10",
markersize=12,
ax=ax,
)
lp.set_yticklabels(["", "3", "", "5", "", "7", "", "9"])
ax.set_title(title)
if show_legend is True:
sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1))
else:
ax.get_legend().set_visible(False)
def get_model(
df: pd.DataFrame, target: str, formula: str, dpi: int = None
) -> RegressionResultsWrapper:
df_ = df[df.dpi == dpi] if dpi is not None else df
return ols(f"{target} {formula}", data=df_).fit()
def anova_table(aov, add_columns: bool = True):
"""
The function below was created specifically for the one-way ANOVA table
results returned for Type II sum of squares
"""
if add_columns is True:
aov["mean_sq"] = aov[:]["sum_sq"] / aov[:]["df"]
aov["eta_sq"] = aov[:-1]["sum_sq"] / sum(aov["sum_sq"])
aov["omega_sq"] = (
aov[:-1]["sum_sq"] - (aov[:-1]["df"] * aov["mean_sq"][-1])
) / (sum(aov["sum_sq"]) + aov["mean_sq"][-1])
cols = ["sum_sq", "df", "mean_sq", "F", "PR(>F)", "eta_sq", "omega_sq"]
aov = aov[cols]
return aov
def plot_assumptions(models: list, titles: list, figsize=(12, 4)):
fig = Figure(figsize=figsize)
fig.suptitle("Probability plot of model residual's", fontsize="x-large")
axii = fig.subplots(1, len(models))
for ax, model, title in zip(axii, models, titles):
_ = stats.probplot(model.resid, plot=ax, rvalue=True)
ax.set_title(title)
return fig
def hghlight_rejection(s):
df = pd.DataFrame(columns=s.columns, index=s.index)
df.loc[s["reject_pred"].ne(s["reject_obs"]), ["group1", "group2"]] = (
"background: red"
)
df.loc[s["reject_pred"].eq(s["reject_obs"]), ["group1", "group2"]] = (
"background: green"
)
df.loc[s.reject_pred, ["reject_pred"]] = "background: green"
df.loc[~s.reject_pred, ["reject_pred"]] = "background: red"
df.loc[s.reject_obs, ["reject_obs"]] = "background: green"
df.loc[~s.reject_obs, ["reject_obs"]] = "background: red"
return df
def get_tuckey_df(endog, groups, df_genotypes) -> pd.DataFrame:
tukey = pairwise_tukeyhsd(endog=endog, groups=groups)
df_tuc = pd.DataFrame(tukey._results_table)
df_tuc.columns = [str(c) for c in df_tuc.iloc[0]]
ret = (
df_tuc.drop(df_tuc.index[0])
.assign(group1=lambda s: s.group1.astype(str))
.assign(group2=lambda s: s.group2.astype(str))
.assign(reject=lambda s: s.reject.astype(str) == "True")
)
ret["p-adj"] = tukey.pvalues
if df_genotypes is None:
return ret
else:
return (
ret.merge(right=df_genotypes, how="left", left_on="group1", right_on="gen")
.drop(["gen"], axis=1)
.rename(columns={"rpvloci": "group1_rpvloci"})
.merge(right=df_genotypes, how="left", left_on="group2", right_on="gen")
.drop(["gen"], axis=1)
.rename(columns={"rpvloci": "group2_rpvloci"})
)
def get_tuckey_compare(df, df_genotypes=None, groups: str = "gen"):
merge_on = (
["group1", "group2"]
if df_genotypes is None
else ["group1", "group2", "group1_rpvloci", "group2_rpvloci"]
)
df_poiv = get_tuckey_df(df.p_oiv, df[groups], df_genotypes=df_genotypes)
df_oiv = get_tuckey_df(df.oiv, df[groups], df_genotypes=df_genotypes)
df = pd.merge(left=df_poiv, right=df_oiv, on=merge_on, suffixes=["_pred", "_obs"])
return df
def df_tukey_cmp_plot(df, groups):
df_tukey = (
get_tuckey_compare(df=df, groups=groups, df_genotypes=None)
.assign(pair_groups=lambda s: s.group1 + "\n" + s.group2)
.sort_values("p-adj_obs")
)
df_tukey_reject = df_tukey[df_tukey.reject_obs & df_tukey.reject_pred]
df_tukey_accept = df_tukey[~df_tukey.reject_obs & ~df_tukey.reject_pred]
df_tukey_diverge = df_tukey[df_tukey.reject_obs != df_tukey.reject_pred]
fig = Figure(figsize=(20, 6))
ax_reject, ax_diverge, ax_accept = fig.subplots(
1,
3,
gridspec_kw={
"width_ratios": [
len(df_tukey_reject),
len(df_tukey_diverge),
len(df_tukey_accept),
]
},
sharey=True,
)
for ax in [ax_reject, ax_accept, ax_diverge]:
ax.set_yticks(ticks=stars, labels=["*", "**", "***", "****"])
ax.grid(False)
ax_reject.set_title("Rejected")
ax_diverge.set_title("Conflict")
ax_accept.set_title("Accepted")
for ax, df in zip(
[ax_reject, ax_accept, ax_diverge],
[df_tukey_reject, df_tukey_accept, df_tukey_diverge],
):
for star in stars:
ax.axhline(y=star, linestyle="-", color="black", alpha=0.5)
ax.bar(
x=df["pair_groups"],
height=-np.log(df["p-adj_pred"]),
width=-0.4,
align="edge",
color="green",
label="predictions",
)
ax.bar(
x=df["pair_groups"],
height=-np.log(df["p-adj_obs"]),
width=0.4,
align="edge",
color="blue",
label="scorings",
)
ax.margins(0.01)
ax_accept.legend(loc="upper left", bbox_to_anchor=[0, 1], ncols=1, fancybox=True)
ax_reject.set_ylabel("-log(p value)")
ax_reject.tick_params(axis="y", which="major", labelsize=16)
fig.subplots_adjust(wspace=0.05, hspace=0.05)
return fig
def plot_patches(df, diff_only: bool = True):
if diff_only is True:
df = df[(df.oiv != df.p_oiv)]
df = df.assign(diff=lambda s: s.oiv != s.p_oiv).sort_values(
["diff", "oiv", "p_oiv"]
)
return pn.GridBox(
*[
pn.Column(
pn.pane.Markdown(f"### {row.file_name}|{row.oiv}->p{row.p_oiv}"),
pn.pane.Image(
object=ci.enhance_pil_image(
image=ci.load_image(
file_name=row.file_name,
path_to_images=cc.path_to_leaf_patches,
),
brightness=1.5,
)
),
)
for _, row in df.iterrows()
],
ncols=len(df),
)