Spaces:

gsarti
/

pecore

Running on Zero

App Files Files Community

pecore / app.py

gsarti

Usage guide to be completed

d82cd6f 9 months ago

raw

history blame

25.9 kB

	import json
	import os

	import gradio as gr
	import spaces
	from contents import (
	citation,
	description,
	examples,
	how_it_works_intro,
	cti_explanation,
	cci_explanation,
	how_to_use,
	example_explanation,
	subtitle,
	title,
	powered_by,
	support,
	)
	from gradio_highlightedtextbox import HighlightedTextbox
	from presets import (
	set_chatml_preset,
	set_cora_preset,
	set_default_preset,
	set_mmt_preset,
	set_towerinstruct_preset,
	set_zephyr_preset,
	set_gemma_preset,
	set_mistral_instruct_preset,
	)
	from style import custom_css
	from utils import get_formatted_attribute_context_results

	from inseq import list_feature_attribution_methods, list_step_functions
	from inseq.commands.attribute_context.attribute_context import (
	AttributeContextArgs,
	attribute_context_with_model,
	)
	from inseq.models import HuggingfaceModel

	loaded_model: HuggingfaceModel = None


	@spaces.GPU()
	def pecore(
	input_current_text: str,
	input_context_text: str,
	output_current_text: str,
	output_context_text: str,
	model_name_or_path: str,
	attribution_method: str,
	attributed_fn: str \| None,
	context_sensitivity_metric: str,
	context_sensitivity_std_threshold: float,
	context_sensitivity_topk: int,
	attribution_std_threshold: float,
	attribution_topk: int,
	input_template: str,
	output_template: str,
	contextless_input_template: str,
	contextless_output_template: str,
	special_tokens_to_keep: str \| list[str] \| None,
	decoder_input_output_separator: str,
	model_kwargs: str,
	tokenizer_kwargs: str,
	generation_kwargs: str,
	attribution_kwargs: str,
	):
	global loaded_model
	if "{context}" in output_template and not output_context_text:
	raise gr.Error(
	"Parameter 'Generation context' must be set when including {context} in the output template."
	)
	if loaded_model is None or model_name_or_path != loaded_model.model_name:
	gr.Info("Loading model...")
	loaded_model = HuggingfaceModel.load(
	model_name_or_path,
	attribution_method,
	model_kwargs=json.loads(model_kwargs),
	tokenizer_kwargs=json.loads(tokenizer_kwargs),
	)
	kwargs = {}
	if context_sensitivity_topk > 0:
	kwargs["context_sensitivity_topk"] = context_sensitivity_topk
	if attribution_topk > 0:
	kwargs["attribution_topk"] = attribution_topk
	if input_context_text:
	kwargs["input_context_text"] = input_context_text
	if output_context_text:
	kwargs["output_context_text"] = output_context_text
	if output_current_text:
	kwargs["output_current_text"] = output_current_text
	if decoder_input_output_separator:
	kwargs["decoder_input_output_separator"] = decoder_input_output_separator
	pecore_args = AttributeContextArgs(
	show_intermediate_outputs=False,
	save_path=os.path.join(os.path.dirname(__file__), "outputs/output.json"),
	add_output_info=True,
	viz_path=os.path.join(os.path.dirname(__file__), "outputs/output.html"),
	show_viz=False,
	model_name_or_path=model_name_or_path,
	attribution_method=attribution_method,
	attributed_fn=attributed_fn,
	attribution_selectors=None,
	attribution_aggregators=None,
	normalize_attributions=True,
	model_kwargs=json.loads(model_kwargs),
	tokenizer_kwargs=json.loads(tokenizer_kwargs),
	generation_kwargs=json.loads(generation_kwargs),
	attribution_kwargs=json.loads(attribution_kwargs),
	context_sensitivity_metric=context_sensitivity_metric,
	prompt_user_for_contextless_output_next_tokens=False,
	special_tokens_to_keep=special_tokens_to_keep,
	context_sensitivity_std_threshold=context_sensitivity_std_threshold,
	attribution_std_threshold=attribution_std_threshold,
	input_current_text=input_current_text,
	input_template=input_template,
	output_template=output_template,
	contextless_input_current_text=contextless_input_template,
	contextless_output_current_text=contextless_output_template,
	handle_output_context_strategy="pre",
	**kwargs,
	)
	out = attribute_context_with_model(pecore_args, loaded_model)
	tuples = get_formatted_attribute_context_results(loaded_model, out.info, out)
	if not tuples:
	msg = f"Output: {out.output_current}\nWarning: No pairs were found by PECoRe.\nTry adjusting Results Selection parameters to soften selection constraints (e.g. setting Context sensitivity threshold to 0)."
	tuples = [(msg, None)]
	return [
	tuples,
	gr.DownloadButton(
	label="📂 Download output",
	value=os.path.join(os.path.dirname(__file__), "outputs/output.json"),
	visible=True,
	),
	gr.DownloadButton(
	label="🔍 Download HTML",
	value=os.path.join(os.path.dirname(__file__), "outputs/output.html"),
	visible=True,
	)
	]


	@spaces.GPU()
	def preload_model(
	model_name_or_path: str,
	attribution_method: str,
	model_kwargs: str,
	tokenizer_kwargs: str,
	):
	global loaded_model
	if loaded_model is None or model_name_or_path != loaded_model.model_name:
	gr.Info("Loading model...")
	loaded_model = HuggingfaceModel.load(
	model_name_or_path,
	attribution_method,
	model_kwargs=json.loads(model_kwargs),
	tokenizer_kwargs=json.loads(tokenizer_kwargs),
	)


	with gr.Blocks(css=custom_css) as demo:
	with gr.Row():
	with gr.Column(scale=0.1, min_width=100):
	gr.HTML(f'<img src="file/img/pecore_logo_white_contour.png" width=100px />')
	with gr.Column(scale=0.8):
	gr.Markdown(title)
	gr.Markdown(subtitle)
	with gr.Column(scale=0.1, min_width=100):
	gr.HTML(f'<img src="file/img/pecore_logo_white_contour.png" width=100px />')
	gr.Markdown(description)
	with gr.Tab("🐑 Demo"):
	with gr.Row():
	with gr.Column():
	input_context_text = gr.Textbox(
	label="Input context", lines=3, placeholder="Your input context..."
	)
	input_current_text = gr.Textbox(
	label="Input query", placeholder="Your input query..."
	)
	attribute_input_button = gr.Button("Run PECoRe", variant="primary")
	with gr.Column():
	pecore_output_highlights = HighlightedTextbox(
	value=[
	("This output will contain ", None),
	("context sensitive", "Context sensitive"),
	(" generated tokens and ", None),
	("influential context", "Influential context"),
	(" tokens.", None),
	],
	color_map={
	"Context sensitive": "#5fb77d",
	"Influential context": "#80ace8",
	},
	show_legend=True,
	label="PECoRe Output",
	combine_adjacent=True,
	interactive=False,
	)
	with gr.Row(equal_height=True):
	download_output_file_button = gr.DownloadButton(
	"📂 Download output",
	visible=False,
	)
	download_output_html_button = gr.DownloadButton(
	"🔍 Download HTML",
	visible=False,
	value=os.path.join(
	os.path.dirname(__file__), "outputs/output.html"
	),
	)
	preset_comment = gr.Markdown(
	"<i>The <a href='https://huggingface.co/gsarti/cora_mgen' target='_blank'>CORA Multilingual QA</a> model by <a href='https://openreview.net/forum?id=e8blYRui3j' target='_blank'>Asai et al. (2021)</a> is set as default and can be used with the examples below. Explore other presets in the ⚙️ Parameters tab.</i>"
	)
	attribute_input_examples = gr.Examples(
	examples,
	inputs=[input_current_text, input_context_text],
	outputs=pecore_output_highlights,
	examples_per_page=1,
	)
	with gr.Tab("⚙️ Parameters") as params_tab:
	gr.Markdown(
	"## ✨ Presets\nSelect a preset to load the selected model and its default parameters (e.g. prompt template, special tokens, etc.) into the fields below.<br>⚠️ This will overwrite existing parameters. If you intend to use large models that could crash the demo, please clone this Space and allocate appropriate resources for them to run comfortably."
	)
	check_enable_large_models = gr.Checkbox(False, label = "I understand, enable large models presets")
	with gr.Row(equal_height=True):
	with gr.Column():
	default_preset = gr.Button("Default", variant="secondary")
	gr.Markdown(
	"Default preset using templates without special tokens or parameters.\nCan be used with most decoder-only and encoder-decoder models."
	)
	with gr.Column():
	cora_preset = gr.Button("CORA mQA", variant="secondary")
	gr.Markdown(
	"Preset for the <a href='https://huggingface.co/gsarti/cora_mgen' target='_blank'>CORA Multilingual QA</a> model.\nUses special templates for inputs."
	)
	with gr.Column():
	zephyr_preset = gr.Button("Zephyr Template", variant="secondary", interactive=False)
	gr.Markdown(
	"Preset for models using the <a href='https://huggingface.co/stabilityai/stablelm-2-zephyr-1_6b' target='_blank'>StableLM 2 Zephyr conversational template</a>.\nUses <code><\|system\|></code>, <code><\|user\|></code> and <code><\|assistant\|></code> special tokens."
	)
	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	multilingual_mt_template = gr.Button(
	"Multilingual MT", variant="secondary"
	)
	gr.Markdown(
	"Preset for multilingual MT models such as <a href='https://huggingface.co/facebook/nllb-200-distilled-600M' target='_blank'>NLLB</a> and <a href='https://huggingface.co/facebook/mbart-large-50-many-to-many-mmt' target='_blank'>mBART</a> using language tags."
	)
	with gr.Column(scale=1):
	chatml_template = gr.Button("Qwen ChatML", variant="secondary")
	gr.Markdown(
	"Preset for models using the <a href='https://github.com/MicrosoftDocs/azure-docs/blob/main/articles/ai-services/openai/includes/chat-markup-language.md' target='_blank'>ChatML conversational template</a>.\nUses <code><\|im_start\|></code>, <code><\|im_end\|></code> special tokens."
	)
	with gr.Column(scale=1):
	towerinstruct_template = gr.Button(
	"Unbabel TowerInstruct", variant="secondary", interactive=False
	)
	gr.Markdown(
	"Preset for models using the <a href='https://huggingface.co/Unbabel/TowerInstruct-7B-v0.1' target='_blank'>Unbabel TowerInstruct</a> conversational template.\nUses <code><\|im_start\|></code>, <code><\|im_end\|></code> special tokens."
	)
	with gr.Row(equal_height=True):
	with gr.Column(scale=1):
	gemma_template = gr.Button(
	"Gemma Chat Template", variant="secondary", interactive=False
	)
	gr.Markdown(
	"Preset for <a href='https://huggingface.co/google/gemma-2b-it' target='_blank'>Gemma</a> instruction-tuned models."
	)
	with gr.Column(scale=1):
	mistral_instruct_template = gr.Button(
	"Mistral Instruct", variant="secondary", interactive=False
	)
	gr.Markdown(
	"Preset for models using the <a href='https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2' target='_blank'>Mistral Instruct template</a>.\nUses <code>[INST]...[/INST]</code> special tokens."
	)
	gr.Markdown("## ⚙️ PECoRe Parameters")
	with gr.Row(equal_height=True):
	with gr.Column():
	model_name_or_path = gr.Textbox(
	value="gsarti/cora_mgen",
	label="Model",
	info="Hugging Face Hub identifier of the model to analyze with PECoRe.",
	interactive=True,
	)
	load_model_button = gr.Button(
	"Load model",
	variant="secondary",
	)
	context_sensitivity_metric = gr.Dropdown(
	value="kl_divergence",
	label="Context sensitivity metric",
	info="Metric to use to measure context sensitivity of generated tokens.",
	choices=list_step_functions(),
	interactive=True,
	)
	attribution_method = gr.Dropdown(
	value="saliency",
	label="Attribution method",
	info="Attribution method identifier to identify relevant context tokens.",
	choices=list_feature_attribution_methods(),
	interactive=True,
	)
	attributed_fn = gr.Dropdown(
	value="contrast_prob_diff",
	label="Attributed function",
	info="Function of model logits to use as target for the attribution method.",
	choices=list_step_functions(),
	interactive=True,
	)
	gr.Markdown("#### Results Selection Parameters")
	with gr.Row(equal_height=True):
	context_sensitivity_std_threshold = gr.Number(
	value=0.0,
	label="Context sensitivity threshold",
	info="Select N to keep context sensitive tokens with scores above N * std. 0 = above mean.",
	precision=1,
	minimum=0.0,
	maximum=5.0,
	step=0.5,
	interactive=True,
	)
	context_sensitivity_topk = gr.Number(
	value=0,
	label="Context sensitivity top-k",
	info="Select N to keep top N context sensitive tokens. 0 = keep all.",
	interactive=True,
	precision=0,
	minimum=0,
	maximum=10,
	)
	attribution_std_threshold = gr.Number(
	value=1.0,
	label="Attribution threshold",
	info="Select N to keep attributed tokens with scores above N * std. 0 = above mean.",
	precision=1,
	minimum=0.0,
	maximum=5.0,
	step=0.5,
	interactive=True,
	)
	attribution_topk = gr.Number(
	value=5,
	label="Attribution top-k",
	info="Select N to keep top N attributed tokens in the context. 0 = keep all.",
	interactive=True,
	precision=0,
	minimum=0,
	maximum=100,
	)

	gr.Markdown("#### Text Format Parameters")
	with gr.Row(equal_height=True):
	input_template = gr.Textbox(
	value="<Q>:{current} <P>:{context}",
	label="Contextual input template",
	info="Template to format the input for the model. Use {current} and {context} placeholders for Input Query and Input Context, respectively.",
	interactive=True,
	)
	output_template = gr.Textbox(
	value="{current}",
	label="Contextual output template",
	info="Template to format the output from the model. Use {current} and {context} placeholders for Generation Output and Generation Context, respectively.",
	interactive=True,
	)
	contextless_input_template = gr.Textbox(
	value="<Q>:{current}",
	label="Contextless input template",
	info="Template to format the input query in the non-contextual setting. Use {current} placeholder for Input Query.",
	interactive=True,
	)
	contextless_output_template = gr.Textbox(
	value="{current}",
	label="Contextless output template",
	info="Template to format the output from the model. Use {current} placeholder for Generation Output.",
	interactive=True,
	)
	with gr.Row(equal_height=True):
	special_tokens_to_keep = gr.Dropdown(
	label="Special tokens to keep",
	info="Special tokens to keep in the attribution. If empty, all special tokens are ignored.",
	value=None,
	multiselect=True,
	allow_custom_value=True,
	)
	decoder_input_output_separator = gr.Textbox(
	label="Decoder input/output separator",
	info="Separator to use between input and output in the decoder input.",
	value="",
	interactive=True,
	lines=1,
	)

	gr.Markdown("## ⚙️ Generation Parameters")
	with gr.Row(equal_height=True):
	with gr.Column(scale=0.5):
	gr.Markdown(
	"The following arguments can be used to control generation parameters and force specific model outputs."
	)
	with gr.Column(scale=1):
	generation_kwargs = gr.Code(
	value="{}",
	language="json",
	label="Generation kwargs (JSON)",
	interactive=True,
	lines=1,
	)
	with gr.Row(equal_height=True):
	output_current_text = gr.Textbox(
	label="Generation output",
	info="Specifies an output to force-decoded during generation. If blank, the model will generate freely.",
	interactive=True,
	)
	output_context_text = gr.Textbox(
	label="Generation context",
	info="If specified, this context is used as starting point for generation. Useful for e.g. chain-of-thought reasoning.",
	interactive=True,
	)
	gr.Markdown("## ⚙️ Other Parameters")
	with gr.Row(equal_height=True):
	with gr.Column():
	gr.Markdown(
	"The following arguments will be passed to initialize the Hugging Face model and tokenizer, and to the `inseq_model.attribute` method."
	)
	with gr.Column():
	model_kwargs = gr.Code(
	value="{}",
	language="json",
	label="Model kwargs (JSON)",
	interactive=True,
	lines=1,
	min_width=160,
	)
	with gr.Column():
	tokenizer_kwargs = gr.Code(
	value="{}",
	language="json",
	label="Tokenizer kwargs (JSON)",
	interactive=True,
	lines=1,
	)
	with gr.Column():
	attribution_kwargs = gr.Code(
	value='{\n\t"logprob": true\n}',
	language="json",
	label="Attribution kwargs (JSON)",
	interactive=True,
	lines=1,
	)
	with gr.Tab("🔍 How Does It Work?"):
	gr.Markdown(how_it_works_intro)
	with gr.Row(equal_height=True):
	with gr.Column(scale=0.60):
	gr.Markdown(cti_explanation)
	with gr.Column(scale=0.30):
	gr.HTML('<img src="file/img/cti_white_outline.png" width=100% />')
	with gr.Row(equal_height=True):
	with gr.Column(scale=0.35):
	gr.HTML('<img src="file/img/cci_white_outline.png" width=100% />')
	with gr.Column(scale=0.65):
	gr.Markdown(cci_explanation)
	with gr.Tab("🔧 Usage Guide"):
	gr.Markdown(how_to_use)
	gr.HTML('<img src="file/img/pecore_ui_output_example.png" width=100% />')
	gr.Markdown(example_explanation)
	with gr.Tab("📚 Citing PECoRe"):
	gr.Markdown(citation)
	with gr.Row(elem_classes="footer-container"):
	gr.Markdown(powered_by)
	gr.Markdown(support)


	# Main logic

	load_model_args = [
	model_name_or_path,
	attribution_method,
	model_kwargs,
	tokenizer_kwargs,
	]

	attribute_input_button.click(
	lambda *args: [gr.DownloadButton(visible=False), gr.DownloadButton(visible=False)],
	inputs=[],
	outputs=[download_output_file_button, download_output_html_button],
	).then(
	pecore,
	inputs=[
	input_current_text,
	input_context_text,
	output_current_text,
	output_context_text,
	model_name_or_path,
	attribution_method,
	attributed_fn,
	context_sensitivity_metric,
	context_sensitivity_std_threshold,
	context_sensitivity_topk,
	attribution_std_threshold,
	attribution_topk,
	input_template,
	output_template,
	contextless_input_template,
	contextless_output_template,
	special_tokens_to_keep,
	decoder_input_output_separator,
	model_kwargs,
	tokenizer_kwargs,
	generation_kwargs,
	attribution_kwargs,
	],
	outputs=[
	pecore_output_highlights,
	download_output_file_button,
	download_output_html_button,
	],
	)

	load_model_event = load_model_button.click(
	preload_model,
	inputs=load_model_args,
	outputs=[],
	)

	# Preset params

	check_enable_large_models.input(
	lambda checkbox, *buttons: [gr.Button(interactive=checkbox) for _ in buttons],
	inputs=[check_enable_large_models, zephyr_preset, towerinstruct_template, gemma_template, mistral_instruct_template],
	outputs=[zephyr_preset, towerinstruct_template, gemma_template, mistral_instruct_template],
	)

	outputs_to_reset = [
	model_name_or_path,
	input_template,
	output_template,
	contextless_input_template,
	contextless_output_template,
	special_tokens_to_keep,
	decoder_input_output_separator,
	model_kwargs,
	tokenizer_kwargs,
	generation_kwargs,
	attribution_kwargs,
	]
	reset_kwargs = {
	"fn": set_default_preset,
	"inputs": None,
	"outputs": outputs_to_reset,
	}

	# Presets

	default_preset.click(**reset_kwargs).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	cora_preset.click(**reset_kwargs).then(
	set_cora_preset,
	outputs=[model_name_or_path, input_template, contextless_input_template],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	zephyr_preset.click(**reset_kwargs).then(
	set_zephyr_preset,
	outputs=[
	model_name_or_path,
	input_template,
	contextless_input_template,
	decoder_input_output_separator,
	special_tokens_to_keep,
	],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	multilingual_mt_template.click(**reset_kwargs).then(
	set_mmt_preset,
	outputs=[model_name_or_path, input_template, output_template, tokenizer_kwargs],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	chatml_template.click(**reset_kwargs).then(
	set_chatml_preset,
	outputs=[
	model_name_or_path,
	input_template,
	contextless_input_template,
	decoder_input_output_separator,
	special_tokens_to_keep,
	],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	towerinstruct_template.click(**reset_kwargs).then(
	set_towerinstruct_preset,
	outputs=[
	model_name_or_path,
	input_template,
	contextless_input_template,
	decoder_input_output_separator,
	special_tokens_to_keep,
	],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	gemma_template.click(**reset_kwargs).then(
	set_gemma_preset,
	outputs=[
	model_name_or_path,
	input_template,
	contextless_input_template,
	decoder_input_output_separator,
	special_tokens_to_keep,
	],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	mistral_instruct_template.click(**reset_kwargs).then(
	set_mistral_instruct_preset,
	outputs=[
	model_name_or_path,
	input_template,
	contextless_input_template,
	decoder_input_output_separator,
	],
	).success(preload_model, inputs=load_model_args, cancels=load_model_event)

	demo.launch(allowed_paths=["outputs/", "img/"])