Spaces:

posit
/

gptneox-chat

Runtime error

App Files Files Community

dfalbel commited on May 23, 2023

Commit

13c1f55

•

1 Parent(s): 173d645

tokenize from the main session

Browse files

Files changed (2) hide show

app.R +21 -10
model-session.R +13 -12

app.R CHANGED Viewed

@@ -4,12 +4,13 @@ library(minhub)
 library(magrittr)
 source("model-session.R")
 repo <- "stabilityai/stablelm-tuned-alpha-3b"
 repo <- Sys.getenv("MODEL_REPO", unset = repo)
 sess <- model_session$new()
 max_n_tokens <- 100
-system_prompt = "<|SYSTEM|># StableLM Tuned (Alpha version)
 - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
 - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
 - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
@@ -34,7 +35,7 @@ ui <- page_fillable(
 )
 server <- function(input, output, session) {
-  prompt <- reactiveVal(value = system_prompt)
   n_tokens <- reactiveVal(value = 0)
   msg_id <- reactiveVal(value = 0)
@@ -46,12 +47,21 @@ server <- function(input, output, session) {
     updateActionButton(inputId = "send", label = "Waiting for model...")
     insert_message(msg_id, as.character(glue::glue("🤗: {input$prompt}")))
     # we modify the prompt to trigger the 'next_token' reactive
-    prompt(paste0(prompt(), "<|USER|>", input$prompt, "<|ASSISTANT|>"))
   })
-  next_token <- eventReactive(prompt(), ignoreInit = TRUE, {
-    prompt() %>%
       sess$generate() %>%
       promises::then(
         onFulfilled = function(x) {x},
@@ -65,17 +75,18 @@ server <- function(input, output, session) {
   observeEvent(next_token(), {
     tok <- next_token()
     n_tokens(n_tokens() + 1)
     tok %>% promises::then(function(tok) {
       if (n_tokens() == 1) {
-        insert_message(msg_id, paste0("🤖: ", tok), append = FALSE)
       } else {
-        insert_message(msg_id, tok, append = TRUE)
       }
-      if (tok != "" && n_tokens() < max_n_tokens) {
-        prompt(paste0(prompt(), tok))
       } else {
         shinyjs::enable("send")
         updateActionButton(inputId = "send", label = "Send")

 library(magrittr)
 source("model-session.R")
+repo <- "EleutherAI/pythia-70m"
 repo <- "stabilityai/stablelm-tuned-alpha-3b"
 repo <- Sys.getenv("MODEL_REPO", unset = repo)
 sess <- model_session$new()
 max_n_tokens <- 100
+system_prompt <- "<|SYSTEM|># StableLM Tuned (Alpha version)
 - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI.
 - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user.
 - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes.
 )
 server <- function(input, output, session) {
+  idxs <- reactiveVal()
   n_tokens <- reactiveVal(value = 0)
   msg_id <- reactiveVal(value = 0)
     updateActionButton(inputId = "send", label = "Waiting for model...")
     insert_message(msg_id, as.character(glue::glue("🤗: {input$prompt}")))
+    if (is.null(idxs())) {
+      current_idxs <- sess$tok$encode(system_prompt)$ids
+    } else {
+      current_idxs <- idxs()
+    }
+    new_idxs <- paste0("<|USER|>", input$prompt, "<|ASSISTANT|>")
+    new_idxs <- sess$tok$encode(new_idxs)$ids
     # we modify the prompt to trigger the 'next_token' reactive
+    idxs(c(current_idxs, new_idxs))
   })
+  next_token <- eventReactive(idxs(), ignoreInit = TRUE, {
+    idxs() %>%
       sess$generate() %>%
       promises::then(
         onFulfilled = function(x) {x},
   observeEvent(next_token(), {
     tok <- next_token()
     n_tokens(n_tokens() + 1)
     tok %>% promises::then(function(tok) {
+      tok_dec <- sess$tok$decode(tok)
       if (n_tokens() == 1) {
+        insert_message(msg_id, paste0("🤖: ", tok_dec), append = FALSE)
       } else {
+        insert_message(msg_id, tok_dec, append = TRUE)
       }
+      if ((!tok %in% c(50278L, 50279L, 50277L, 1L, 0L)) &&
+          n_tokens() < max_n_tokens) {
+        idxs(c(idxs(), tok))
       } else {
         shinyjs::enable("send")
         updateActionButton(inputId = "send", label = "Send")

model-session.R CHANGED Viewed

@@ -13,37 +13,38 @@ model_session <- R6::R6Class(
         cat("Model is already loaded.", "\n")
         return(self$task_q$push(function() "done"))
       }
       self$task_q <- callq::task_q$new(num_workers = 1)
       self$task_q$push(args = list(repo = repo), function(repo) {
         library(torch)
         library(zeallot)
         library(minhub)
-	device <- if (cuda_is_available()) "cuda" else "cpu"
         model <<- minhub::gptneox_from_pretrained(repo)
         model$eval()
-	if (device == "cuda") {
-	  model$to(device=device)
-	  #model$to(dtype=torch_float())
-	} else {
           model$to(dtype = torch_float())
-	}
-        tok <<- tok::tokenizer$from_pretrained(repo)
         "done"
       })
     },
-    generate = function(prompt) {
       if (is.null(self$task_q)) {
         cat("Model is not loaded, error.", "\n")
         return(self$task_q$push(function() stop("Model is not loaded")))
       }
       args <- list(
-        prompt = prompt,
         temperature = self$temperature,
         top_k = self$top_k
       )
-      self$task_q$push(args = args, function(prompt, temperature, top_k) {
         device <- if (cuda_is_available()) "cuda" else "cpu"
-	idx <- torch_tensor(tok$encode(prompt)$ids, device=device)$view(c(1, -1))
         with_no_grad({
           logits <- model(idx + 1L)$to(dtype="float", device="cpu")
         })
@@ -52,7 +53,7 @@ model_session <- R6::R6Class(
         logits <- torch_full_like(logits, -1e7)$scatter_(-1, ind, prob)
         logits <- nnf_softmax(logits, dim = -1)
         id_next <- torch::torch_multinomial(logits, num_samples = 1)$cpu() - 1L
-        tok$decode(as.integer(id_next))
       })
     }
   )

         cat("Model is already loaded.", "\n")
         return(self$task_q$push(function() "done"))
       }
+      # the tokenizer doesn't need to live in the remote session.
+      self$tok <- tok::tokenizer$from_pretrained(repo)
       self$task_q <- callq::task_q$new(num_workers = 1)
       self$task_q$push(args = list(repo = repo), function(repo) {
         library(torch)
         library(zeallot)
         library(minhub)
+	      device <- if (cuda_is_available()) "cuda" else "cpu"
         model <<- minhub::gptneox_from_pretrained(repo)
         model$eval()
+        if (device == "cuda") {
+          model$to(dtype=torch_half())
+          model$to(device=device)
+        } else {
           model$to(dtype = torch_float())
+        }
         "done"
       })
     },
+    generate = function(idx) {
       if (is.null(self$task_q)) {
         cat("Model is not loaded, error.", "\n")
         return(self$task_q$push(function() stop("Model is not loaded")))
       }
       args <- list(
+        idx = idx,
         temperature = self$temperature,
         top_k = self$top_k
       )
+      self$task_q$push(args = args, function(idx, temperature, top_k) {
         device <- if (cuda_is_available()) "cuda" else "cpu"
+	      idx <- torch_tensor(idx, device=device)$view(c(1, -1))
         with_no_grad({
           logits <- model(idx + 1L)$to(dtype="float", device="cpu")
         })
         logits <- torch_full_like(logits, -1e7)$scatter_(-1, ind, prob)
         logits <- nnf_softmax(logits, dim = -1)
         id_next <- torch::torch_multinomial(logits, num_samples = 1)$cpu() - 1L
+        as.integer(id_next)
       })
     }
   )