pratikshahp
commited on
Update guardrail.py
Browse files- guardrail.py +0 -6
guardrail.py
CHANGED
@@ -13,13 +13,11 @@ safe_content_policy = {
|
|
13 |
- Discuss non-financial topics such as health, technology, or politics.
|
14 |
Can:
|
15 |
- Provide responses strictly within the financial domain, including investment, savings, taxes, and financial management.
|
16 |
-
|
17 |
O2: Sensitive Information.
|
18 |
Should not:
|
19 |
- Request or handle sensitive personal data like passwords, account numbers, or social security numbers.
|
20 |
Can:
|
21 |
- Discuss financial concepts and advice without requiring personal details.
|
22 |
-
|
23 |
O3: Language.
|
24 |
Should not:
|
25 |
- Use profane, offensive, or inappropriate language.
|
@@ -31,10 +29,8 @@ safe_content_policy = {
|
|
31 |
def is_safe(message: str) -> bool:
|
32 |
"""
|
33 |
Validates if the given message complies with the safety policy using the LlamaGuard model.
|
34 |
-
|
35 |
Args:
|
36 |
message (str): User message or chatbot response.
|
37 |
-
|
38 |
Returns:
|
39 |
bool: True if the message is safe, False otherwise.
|
40 |
"""
|
@@ -42,11 +38,9 @@ def is_safe(message: str) -> bool:
|
|
42 |
<BEGIN SAFETY POLICY>
|
43 |
{safe_content_policy['policy']}
|
44 |
<END SAFETY POLICY>
|
45 |
-
|
46 |
<BEGIN MESSAGE>
|
47 |
{message}
|
48 |
<END MESSAGE>
|
49 |
-
|
50 |
Provide your safety assessment for the above message:
|
51 |
- First line must read 'safe' or 'unsafe'.
|
52 |
- If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
|
|
|
13 |
- Discuss non-financial topics such as health, technology, or politics.
|
14 |
Can:
|
15 |
- Provide responses strictly within the financial domain, including investment, savings, taxes, and financial management.
|
|
|
16 |
O2: Sensitive Information.
|
17 |
Should not:
|
18 |
- Request or handle sensitive personal data like passwords, account numbers, or social security numbers.
|
19 |
Can:
|
20 |
- Discuss financial concepts and advice without requiring personal details.
|
|
|
21 |
O3: Language.
|
22 |
Should not:
|
23 |
- Use profane, offensive, or inappropriate language.
|
|
|
29 |
def is_safe(message: str) -> bool:
|
30 |
"""
|
31 |
Validates if the given message complies with the safety policy using the LlamaGuard model.
|
|
|
32 |
Args:
|
33 |
message (str): User message or chatbot response.
|
|
|
34 |
Returns:
|
35 |
bool: True if the message is safe, False otherwise.
|
36 |
"""
|
|
|
38 |
<BEGIN SAFETY POLICY>
|
39 |
{safe_content_policy['policy']}
|
40 |
<END SAFETY POLICY>
|
|
|
41 |
<BEGIN MESSAGE>
|
42 |
{message}
|
43 |
<END MESSAGE>
|
|
|
44 |
Provide your safety assessment for the above message:
|
45 |
- First line must read 'safe' or 'unsafe'.
|
46 |
- If unsafe, a second line must include a comma-separated list of violated categories. [/INST]"""
|