Spaces:

StevenChen16
/

AI-Lawyer

Running on Zero

App Files Files Community

StevenChen16 commited on 8 days ago

Commit

bccfe43

•

1 Parent(s): c39e972

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -73

app.py CHANGED Viewed

@@ -181,85 +181,85 @@ def chat_llama3_8b(message: str,
     Returns:
         str: Generated response with citations if available
     """
-    try:
-        # 1. Get relevant citations from vector store
-        citation = query_vector_store(vector_store, message, k=4, relevance_threshold=0.7)
-        # 2. Format conversation history
-        conversation = []
-        for user, assistant in history:
-            conversation.extend([
-                {"role": "user", "content": str(user)},
-                {"role": "assistant", "content": str(assistant)}
-            ])
-        # 3. Construct the final prompt
-        final_message = ""
-        if citation:
-            final_message = f"{background_prompt}\nBased on these references:\n{citation}\nPlease answer: {message}"
-        else:
-            final_message = f"{background_prompt}\n{message}"
-        conversation.append({"role": "user", "content": final_message})
-        # 4. Prepare model inputs
-        input_ids = tokenizer.apply_chat_template(
-            conversation,
-            return_tensors="pt"
-        ).to(model.device)
-        # 5. Setup streamer
-        streamer = TextIteratorStreamer(
-            tokenizer,
-            timeout=10.0,
-            skip_prompt=True,
-            skip_special_tokens=True
-        )
-        # 6. Configure generation parameters
-        generation_config = {
-            "input_ids": input_ids,
-            "streamer": streamer,
-            "max_new_tokens": max_new_tokens,
-            "do_sample": temperature > 0,
-            "temperature": temperature,
-            "eos_token_id": terminators
-        }
-        # 7. Generate in a separate thread
-        thread = Thread(target=model.generate, kwargs=generation_config)
-        thread.start()
-        # 8. Stream the output
-        accumulated_text = []
-        final_chunk = False
-        for text_chunk in streamer:
-            accumulated_text.append(text_chunk)
-            current_response = "".join(accumulated_text)
-            # Check if this is the last chunk
-            try:
-                next_chunk = next(iter(streamer))
-                accumulated_text.append(next_chunk)
-            except (StopIteration, RuntimeError):
-                final_chunk = True
-            # Add citations on the final chunk if they exist
-            if final_chunk and citation:
-                formatted_citations = "\n\nReferences:\n" + "\n".join(
-                    f"[{i+1}] {cite.strip()}"
-                    for i, cite in enumerate(citation.split('\n'))
-                    if cite.strip()
-                )
-                current_response += formatted_citations
-            yield current_response
-    except Exception as e:
-        error_message = f"An error occurred: {str(e)}"
-        print(error_message)  # For logging
-        yield error_message
 # Gradio block

     Returns:
         str: Generated response with citations if available
     """
+    # try:
+    # 1. Get relevant citations from vector store
+    citation = query_vector_store(vector_store, message, k=4, relevance_threshold=0.7)
+    # 2. Format conversation history
+    conversation = []
+    for user, assistant in history:
+        conversation.extend([
+            {"role": "user", "content": str(user)},
+            {"role": "assistant", "content": str(assistant)}
+        ])
+    # 3. Construct the final prompt
+    final_message = ""
+    if citation:
+        final_message = f"{background_prompt}\nBased on these references:\n{citation}\nPlease answer: {message}"
+    else:
+        final_message = f"{background_prompt}\n{message}"
+    conversation.append({"role": "user", "content": final_message})
+    # 4. Prepare model inputs
+    input_ids = tokenizer.apply_chat_template(
+        conversation,
+        return_tensors="pt"
+    ).to(model.device)
+    # 5. Setup streamer
+    streamer = TextIteratorStreamer(
+        tokenizer,
+        timeout=10.0,
+        skip_prompt=True,
+        skip_special_tokens=True
+    )
+    # 6. Configure generation parameters
+    generation_config = {
+        "input_ids": input_ids,
+        "streamer": streamer,
+        "max_new_tokens": max_new_tokens,
+        "do_sample": temperature > 0,
+        "temperature": temperature,
+        "eos_token_id": terminators
+    }
+    # 7. Generate in a separate thread
+    thread = Thread(target=model.generate, kwargs=generation_config)
+    thread.start()
+    # 8. Stream the output
+    accumulated_text = []
+    final_chunk = False
+    for text_chunk in streamer:
+        accumulated_text.append(text_chunk)
+        current_response = "".join(accumulated_text)
+        # Check if this is the last chunk
+        try:
+            next_chunk = next(iter(streamer))
+            accumulated_text.append(next_chunk)
+        except (StopIteration, RuntimeError):
+            final_chunk = True
+        # Add citations on the final chunk if they exist
+        if final_chunk and citation:
+            formatted_citations = "\n\nReferences:\n" + "\n".join(
+                f"[{i+1}] {cite.strip()}"
+                for i, cite in enumerate(citation.split('\n'))
+                if cite.strip()
+            )
+            current_response += formatted_citations
+        yield current_response
+    # except Exception as e:
+    #     error_message = f"An error occurred: {str(e)}"
+    #     print(error_message)  # For logging
+    #     yield error_message
 # Gradio block