File size: 3,317 Bytes
35311ae
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import requests
import re
import html
import traceback
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

VLLM_URL = "http://localhost:6002/v1/completions"
MODEL = "lora"

def flexible_parse(text):
    pattern = r'<(\w+)(?:\s+[^>]*)?>((?:(?!<\1).)*?)</\1>|<(\w+)(?:\s+[^>]*)?>'
    result = []
    
    for match in re.finditer(pattern, text):
        tag, content, single_tag = match.groups()
        if single_tag:
            result.append((single_tag, ''))
        elif tag:
            result.append((tag, content.strip() if content else ''))
    
    return result

def format_as_collapsible_markdown(parsed_content):
    markdown = ""
    for tag, content in parsed_content:
        if content:
            markdown += f'<details>\n<summary><strong>{html.escape(tag)}</strong></summary>\n\n{html.escape(content)}\n\n</details>\n\n'
        else:
            markdown += f'<strong>{html.escape(tag)}</strong>\n\n'
    return markdown

def get_completion(title: str, prompt: str):
    full_prompt = f"<title>{title}</title>\n<content>\n{prompt}\n</content>"
    try:
        logging.info(f"Sending request to VLLM server: {VLLM_URL}")
        response = requests.post(
            VLLM_URL,
            json={
                "prompt": full_prompt,
                "max_tokens": 6000,
                "temperature": 1,
                "model": MODEL
            },
            timeout=30000  # Add a timeout
        )
        response.raise_for_status()  # Raise an exception for bad status codes
        logging.info("Successfully received response from VLLM server")
        return response.json()["choices"][0]["text"]
    except requests.exceptions.RequestException as e:
        logging.error(f"Error connecting to VLLM server: {str(e)}")
        return f"Error connecting to VLLM server: {str(e)}"
    except Exception as e:
        logging.error(f"Unexpected error in get_completion: {str(e)}")
        return f"Unexpected error: {str(e)}\n{traceback.format_exc()}"

def gradio_interface(title, prompt):
    try:
        logging.info(f"Received request - Title: {title}, Prompt: {prompt}")
        raw_response = get_completion(title, prompt)
        parsed_content = flexible_parse(raw_response)
        collapsible_view = format_as_collapsible_markdown(parsed_content)
        
        combined_output = f"""
## Raw Response:

```
{raw_response}
```

## Parsed Structure:

{collapsible_view}
"""
        logging.info("Successfully processed request")
        return combined_output
    except Exception as e:
        logging.error(f"Error in gradio_interface: {str(e)}")
        return f"Error in gradio_interface: {str(e)}\n{traceback.format_exc()}"

iface = gr.Interface(
    fn=gradio_interface,
    inputs=[
        gr.Textbox(label="Title"),
        gr.Textbox(label="Prompt", lines=5)
    ],
    outputs=gr.Markdown(label="Response and Parsed Structure"),
    title="VLLM Completion Client with Raw Response and Collapsible View",
    description=f"Enter a title and prompt to generate a completion using the {MODEL} model. The raw response and a collapsible view of the parsed structure will be displayed."
)

if __name__ == "__main__":
    logging.info("Starting Gradio interface")
    iface.launch()