from transformers import AutoTokenizer, AutoModelForSequenceClassification from scipy.special import softmax import gradio as gr tokenizer = AutoTokenizer.from_pretrained("armheb/DNA_bert_6") model2 = AutoModelForSequenceClassification.from_pretrained("simecek/promoters_demo") def kmers(s, k=6): return [s[i:i + k] for i in range(0, len(s)-k+1)] def tokenization(x): return tokenizer(" ".join(kmers(x["seq"])), return_tensors="pt") categories = ["not-promoter", "promoter"] def is_promoter(DNAseq): input = tokenization({"seq": DNAseq}) logits = model2(**input)['logits'].detach().numpy() probs = softmax(logits, axis=1)[0] probs = map(float, probs) return dict(zip(categories, probs)) text = gr.inputs.Textbox(placeholder="Input DNA sequence", lines=5) label = gr.outputs.Label(label = "Is it a promoter?") intf = gr.Interface(fn=is_promoter, inputs=text, outputs=label) intf.launch()