Commit with fix
Browse files- .env.example +8 -0
- HF_README.md +249 -0
- app.py +22 -0
- main.py +729 -0
- requirements.txt +14 -0
- web_app.py +1632 -0
.env.example
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# API Keys - DO NOT commit actual keys to Git
|
| 2 |
+
# Add these as Secrets in your Hugging Face Space settings
|
| 3 |
+
|
| 4 |
+
# Option 1: Nebius API Key (primary)
|
| 5 |
+
NEBIUS_API_KEY=your_nebius_api_key_here
|
| 6 |
+
|
| 7 |
+
# Option 2: OpenAI API Key (fallback)
|
| 8 |
+
# OPENAI_API_KEY=your_openai_api_key_here
|
HF_README.md
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Legal Deed Reviewer
|
| 3 |
+
emoji: ⚖️
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 4.0.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: apache-2.0
|
| 11 |
+
tags:
|
| 12 |
+
- mcp
|
| 13 |
+
- mcp-in-action-productivity
|
| 14 |
+
- legal
|
| 15 |
+
- document-analysis
|
| 16 |
+
- gradio
|
| 17 |
+
- ai
|
| 18 |
+
- legal-tech
|
| 19 |
+
- property
|
| 20 |
+
- deed-analysis
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
# ⚖️ Legal Deed Reviewer
|
| 24 |
+
|
| 25 |
+
**AI-powered legal deed analysis using MCP (Model Context Protocol) servers**
|
| 26 |
+
|
| 27 |
+
Upload property deed documents to receive comprehensive risk assessments, clause-by-clause breakdowns, and plain-language explanations of legal issues.
|
| 28 |
+
|
| 29 |
+
## 🎯 Overview
|
| 30 |
+
|
| 31 |
+
Legal Deed Reviewer is an intelligent system that helps property buyers, landlords, lawyers, and mortgage teams understand the risks and implications in legal deed documents. Built for the **MCP-1st-Birthday Hackathon**, this project demonstrates advanced MCP integration with multi-tool orchestration for legal document analysis.
|
| 32 |
+
|
| 33 |
+
### **What This Tool Does:**
|
| 34 |
+
|
| 35 |
+
✅ **Deed Classification** - Automatically identifies deed type (sale, mortgage, lease, gift, warranty, quitclaim)
|
| 36 |
+
✅ **Metadata Extraction** - Extracts parties, property details, consideration, and jurisdiction
|
| 37 |
+
✅ **Clause Breakdown** - Splits deeds into logical sections and clauses
|
| 38 |
+
✅ **Risk Analysis** - Identifies legal risks with severity levels (LOW/MEDIUM/HIGH)
|
| 39 |
+
✅ **Plain-Language Explanations** - Translates legal jargon into understandable language
|
| 40 |
+
✅ **Actionable Recommendations** - Suggests next steps and areas requiring legal consultation
|
| 41 |
+
|
| 42 |
+
## 🚀 Quick Start
|
| 43 |
+
|
| 44 |
+
1. **Upload a PDF** - Click "Upload Deed (PDF)" and select your property deed document
|
| 45 |
+
2. **Click "Analyze Deed"** - The system will process your document (takes 10-30 seconds)
|
| 46 |
+
3. **Review Results** - Navigate through tabs:
|
| 47 |
+
- **Overview**: Deed metadata and quick stats
|
| 48 |
+
- **Clause Breakdown**: All identified clauses with categorization
|
| 49 |
+
- **Risk Analysis**: Clause-by-clause risk assessment with explanations
|
| 50 |
+
- **Extracted Text**: Full text from the deed
|
| 51 |
+
4. **Download Report** - Get a Markdown report for your records
|
| 52 |
+
|
| 53 |
+
### Sample Deed Included
|
| 54 |
+
|
| 55 |
+
Try the system with our sample: `usa_general_warranty_deed_sample.pdf` (included in the Space)
|
| 56 |
+
|
| 57 |
+
## 🏗️ Architecture
|
| 58 |
+
|
| 59 |
+
### MCP Integration
|
| 60 |
+
|
| 61 |
+
This project uses **5 MCP tools** that work together to provide comprehensive deed analysis:
|
| 62 |
+
|
| 63 |
+
#### **1. PDF Text Extraction**
|
| 64 |
+
```python
|
| 65 |
+
extract_text_from_deed_pdf(pdf_path: str) -> JSON
|
| 66 |
+
```
|
| 67 |
+
- Direct text extraction from PDFs using PyMuPDF
|
| 68 |
+
- OCR fallback for scanned documents
|
| 69 |
+
- Returns full text + page-by-page breakdown
|
| 70 |
+
|
| 71 |
+
#### **2. Clause Splitting**
|
| 72 |
+
```python
|
| 73 |
+
split_deed_into_clauses(text: str) -> JSON
|
| 74 |
+
```
|
| 75 |
+
- Pattern-based clause detection
|
| 76 |
+
- Identifies common deed sections (WITNESSETH, WHEREAS, NOW THEREFORE)
|
| 77 |
+
- Categorizes clause types
|
| 78 |
+
|
| 79 |
+
#### **3. Deed Classification**
|
| 80 |
+
```python
|
| 81 |
+
classify_deed_type(deed_text: str) -> JSON
|
| 82 |
+
```
|
| 83 |
+
- LLM-powered deed type identification
|
| 84 |
+
- Extracts jurisdiction, parties, property details
|
| 85 |
+
- Structured JSON output
|
| 86 |
+
|
| 87 |
+
#### **4. Risk Analysis**
|
| 88 |
+
```python
|
| 89 |
+
analyze_deed_risks(clauses: str, classification: str) -> JSON
|
| 90 |
+
```
|
| 91 |
+
- Clause-by-clause risk assessment
|
| 92 |
+
- Categories: TITLE, WARRANTY, ENCUMBRANCE, EASEMENT, RESTRICTION
|
| 93 |
+
- Risk levels with explanations and recommendations
|
| 94 |
+
|
| 95 |
+
#### **5. Comprehensive Report Generation**
|
| 96 |
+
```python
|
| 97 |
+
generate_comprehensive_deed_report(pdf_path: str) -> JSON
|
| 98 |
+
```
|
| 99 |
+
- Orchestrates all tools in a pipeline
|
| 100 |
+
- Returns complete analysis report
|
| 101 |
+
- Single-command full analysis
|
| 102 |
+
|
| 103 |
+
### Tech Stack
|
| 104 |
+
|
| 105 |
+
- **MCP Framework**: Model Context Protocol for tool orchestration
|
| 106 |
+
- **Gradio 4**: Web interface
|
| 107 |
+
- **FastAPI**: REST API backend
|
| 108 |
+
- **Nebius Qwen2.5-VL-72B**: Vision model for OCR
|
| 109 |
+
- **Meta Llama-3.3-70B**: LLM for legal analysis
|
| 110 |
+
- **PyMuPDF**: PDF processing
|
| 111 |
+
|
| 112 |
+
## 📊 Sample Output
|
| 113 |
+
|
| 114 |
+
### Deed Classification
|
| 115 |
+
```json
|
| 116 |
+
{
|
| 117 |
+
"deed_type": "warranty",
|
| 118 |
+
"jurisdiction": {
|
| 119 |
+
"country": "United States",
|
| 120 |
+
"state_province": "Illinois"
|
| 121 |
+
},
|
| 122 |
+
"key_parties": {
|
| 123 |
+
"grantor": "Michael Austin Carter and wife Laura Jean Carter",
|
| 124 |
+
"grantee": "Husband and wife Address: 7421 Meadowbrook Drive"
|
| 125 |
+
},
|
| 126 |
+
"consideration_amount": "$250,000.00"
|
| 127 |
+
}
|
| 128 |
+
```
|
| 129 |
+
|
| 130 |
+
### Risk Analysis Example
|
| 131 |
+
```
|
| 132 |
+
RISK LEVEL: MEDIUM
|
| 133 |
+
CATEGORY: ENCUMBRANCE
|
| 134 |
+
|
| 135 |
+
EXPLANATION: The deed includes several exceptions and reservations that could
|
| 136 |
+
affect the property's value and usability, including unpaid real estate taxes
|
| 137 |
+
and existing easements.
|
| 138 |
+
|
| 139 |
+
RECOMMENDATION: Conduct a thorough title search to understand the full extent
|
| 140 |
+
of encumbrances and consult with a real estate attorney to assess their impact.
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
## 🎓 Use Cases
|
| 144 |
+
|
| 145 |
+
### For Property Buyers
|
| 146 |
+
- Understand risks before closing
|
| 147 |
+
- Identify unusual clauses
|
| 148 |
+
- Know what questions to ask your lawyer
|
| 149 |
+
|
| 150 |
+
### For Real Estate Lawyers
|
| 151 |
+
- Quick first-pass review
|
| 152 |
+
- Standardized risk assessment
|
| 153 |
+
- Time-saving for routine deeds
|
| 154 |
+
|
| 155 |
+
### For Mortgage Teams
|
| 156 |
+
- Automated security deed screening
|
| 157 |
+
- Risk flagging for approval workflow
|
| 158 |
+
- Compliance checking
|
| 159 |
+
|
| 160 |
+
### For Landlords
|
| 161 |
+
- Lease deed analysis
|
| 162 |
+
- Easement and restriction identification
|
| 163 |
+
- Future resale impact assessment
|
| 164 |
+
|
| 165 |
+
## ⚠️ Legal Disclaimer
|
| 166 |
+
|
| 167 |
+
**IMPORTANT:** This tool provides analysis for informational purposes only and **does not constitute legal advice**.
|
| 168 |
+
|
| 169 |
+
- Always consult with a qualified attorney licensed in your jurisdiction
|
| 170 |
+
- Legal requirements vary by location
|
| 171 |
+
- This tool cannot replace professional legal counsel
|
| 172 |
+
- Use this as a starting point for discussion with your lawyer
|
| 173 |
+
|
| 174 |
+
## 🔧 How It Works
|
| 175 |
+
|
| 176 |
+
### Multi-Step Reasoning Pipeline
|
| 177 |
+
|
| 178 |
+
The system uses intelligent multi-step reasoning:
|
| 179 |
+
|
| 180 |
+
1. **📄 Text Extraction** - Extracts text from PDF (direct or OCR)
|
| 181 |
+
2. **🔍 Classification** - Identifies deed type and jurisdiction
|
| 182 |
+
3. **✂️ Clause Segmentation** - Breaks document into logical sections
|
| 183 |
+
4. **⚖️ Risk Scoring** - Analyzes each clause for legal issues
|
| 184 |
+
5. **📝 Report Generation** - Compiles comprehensive analysis
|
| 185 |
+
|
| 186 |
+
### MCP Tool Orchestration
|
| 187 |
+
|
| 188 |
+
All tools are MCP-compliant and can be called individually or chained:
|
| 189 |
+
|
| 190 |
+
```python
|
| 191 |
+
# Example: Full analysis pipeline
|
| 192 |
+
report = generate_comprehensive_deed_report(pdf_path)
|
| 193 |
+
|
| 194 |
+
# Or: Individual tool calls
|
| 195 |
+
text = extract_text_from_deed_pdf(pdf_path)
|
| 196 |
+
classification = classify_deed_type(text)
|
| 197 |
+
clauses = split_deed_into_clauses(text)
|
| 198 |
+
risks = analyze_deed_risks(clauses, classification)
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
## 🏆 MCP-1st-Birthday Hackathon
|
| 202 |
+
|
| 203 |
+
This project was built for the **MCP-1st-Birthday Hackathon** in the **Productivity Track**.
|
| 204 |
+
|
| 205 |
+
### Why This Project Uses MCP
|
| 206 |
+
|
| 207 |
+
1. **Modularity** - Each legal analysis function is a separate MCP tool
|
| 208 |
+
2. **Composability** - Tools can be chained for complex workflows
|
| 209 |
+
3. **Reusability** - MCP tools work standalone or in pipelines
|
| 210 |
+
4. **Extensibility** - Easy to add new analysis tools (RAG, jurisdiction-specific logic)
|
| 211 |
+
5. **Interoperability** - Standard MCP interface for all tools
|
| 212 |
+
|
| 213 |
+
### Future Enhancements
|
| 214 |
+
|
| 215 |
+
- **RAG System**: Vector database with model clauses and legal precedents
|
| 216 |
+
- **Multi-Jurisdiction Support**: Country-specific risk assessments
|
| 217 |
+
- **Clause Comparison**: Visual diff against standard templates
|
| 218 |
+
- **Advanced Risk Scoring**: ML-based risk prediction
|
| 219 |
+
- **Multi-MCP Architecture**: Separate servers for PDF, RAG, and LLM
|
| 220 |
+
|
| 221 |
+
## 📚 Documentation
|
| 222 |
+
|
| 223 |
+
- **Main README**: Project documentation
|
| 224 |
+
- **CLAUDE.md**: AI assistant guidance for codebase
|
| 225 |
+
- **readme_main.md**: Detailed project guidelines
|
| 226 |
+
- **main_project.md**: Original requirements and roadmap
|
| 227 |
+
|
| 228 |
+
## 👥 Team
|
| 229 |
+
|
| 230 |
+
Built by the **Legal-AI Team** for MCP-1st-Birthday Hackathon:
|
| 231 |
+
- **Sojib**: Frontend (Gradio UI, report export)
|
| 232 |
+
- **Pial & Sojib**: MCP servers (PDF + RAG tools)
|
| 233 |
+
- **Takib**: LLM orchestration and legal prompts
|
| 234 |
+
|
| 235 |
+
## 🔗 Links
|
| 236 |
+
|
| 237 |
+
- **GitHub Repository**: [Legal-Deed-Reviewer](https://github.com/Nehlr1/Legal-Deed-Reviewer)
|
| 238 |
+
- **Hugging Face Space**: This Space!
|
| 239 |
+
- **MCP Documentation**: [Model Context Protocol](https://modelcontextprotocol.io/)
|
| 240 |
+
|
| 241 |
+
## 📄 License
|
| 242 |
+
|
| 243 |
+
Apache-2.0 License - See LICENSE file for details
|
| 244 |
+
|
| 245 |
+
---
|
| 246 |
+
|
| 247 |
+
**Made with ⚖️ for the MCP-1st-Birthday Hackathon**
|
| 248 |
+
|
| 249 |
+
*Empowering users to understand legal documents through AI*
|
app.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hugging Face Space entry point for Legal Deed Reviewer.
|
| 3 |
+
This file is optimized for deployment on Hugging Face Spaces.
|
| 4 |
+
"""
|
| 5 |
+
import sys
|
| 6 |
+
import os
|
| 7 |
+
|
| 8 |
+
# Ensure the current directory is in the path
|
| 9 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 10 |
+
|
| 11 |
+
# Import the main Gradio app
|
| 12 |
+
from web_app import app as gradio_app
|
| 13 |
+
|
| 14 |
+
# Launch for Hugging Face Spaces
|
| 15 |
+
if __name__ == "__main__":
|
| 16 |
+
# Hugging Face Spaces will handle port assignment
|
| 17 |
+
gradio_app.launch(
|
| 18 |
+
server_name="0.0.0.0",
|
| 19 |
+
server_port=7860, # Default Gradio port for Spaces
|
| 20 |
+
share=False, # No need to share on Spaces
|
| 21 |
+
show_error=True
|
| 22 |
+
)
|
main.py
ADDED
|
@@ -0,0 +1,729 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import asyncio
|
| 2 |
+
import base64
|
| 3 |
+
import json
|
| 4 |
+
import mimetypes
|
| 5 |
+
import os
|
| 6 |
+
import pathlib
|
| 7 |
+
import re
|
| 8 |
+
from typing import Dict, List, Any, Optional
|
| 9 |
+
|
| 10 |
+
import fitz # PyMuPDF
|
| 11 |
+
import httpx
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
from openai import OpenAI
|
| 14 |
+
from mcp.server.fastmcp import FastMCP
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
api_key = os.environ.get("NEBIUS_API_KEY") or os.environ.get("OPENAI_API_KEY")
|
| 18 |
+
if not api_key:
|
| 19 |
+
raise RuntimeError("NEBIUS_API_KEY or OPENAI_API_KEY must be set")
|
| 20 |
+
|
| 21 |
+
client = OpenAI(base_url="https://api.tokenfactory.nebius.com/v1/", api_key=api_key)
|
| 22 |
+
|
| 23 |
+
mcp = FastMCP("deed-legal-mcp")
|
| 24 |
+
|
| 25 |
+
# Legal system prompts
|
| 26 |
+
SYSTEM_DEED_LAWYER = """
|
| 27 |
+
You are an expert conveyancing lawyer reviewing deeds.
|
| 28 |
+
Your role is to:
|
| 29 |
+
1. Identify potential legal risks and issues
|
| 30 |
+
2. Classify deed types and extract key information
|
| 31 |
+
3. Explain risks in plain language for non-lawyers
|
| 32 |
+
4. Provide risk scores and categorizations
|
| 33 |
+
|
| 34 |
+
IMPORTANT: You do NOT provide legal advice. You only identify potential issues for review by a qualified lawyer.
|
| 35 |
+
Always include appropriate disclaimers about seeking professional legal counsel.
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
CLASSIFY_DEED_PROMPT = """
|
| 39 |
+
Extract information from this deed document and return ONLY a valid JSON object. Do not include any explanatory text, code fences, markdown formatting, or any other content - just the raw JSON.
|
| 40 |
+
|
| 41 |
+
Required JSON structure:
|
| 42 |
+
{
|
| 43 |
+
"deed_type": "sale|mortgage|lease|gift|warranty|quitclaim|other",
|
| 44 |
+
"jurisdiction": {
|
| 45 |
+
"country": "country name",
|
| 46 |
+
"state_province": "state or province name"
|
| 47 |
+
},
|
| 48 |
+
"key_parties": {
|
| 49 |
+
"grantor": {
|
| 50 |
+
"name": "grantor name",
|
| 51 |
+
"address": "address if available"
|
| 52 |
+
},
|
| 53 |
+
"grantee": {
|
| 54 |
+
"name": "grantee name",
|
| 55 |
+
"address": "address if available"
|
| 56 |
+
},
|
| 57 |
+
"witnesses": []
|
| 58 |
+
},
|
| 59 |
+
"property_description_and_location": {
|
| 60 |
+
"district": "district name",
|
| 61 |
+
"area": "size/area",
|
| 62 |
+
"description": "property description"
|
| 63 |
+
},
|
| 64 |
+
"consideration_amount": "monetary amount if specified",
|
| 65 |
+
"date_of_execution": "date if available",
|
| 66 |
+
"special_conditions_or_restrictions": []
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
CRITICAL INSTRUCTIONS:
|
| 70 |
+
- Return ONLY the JSON object starting with { and ending with }
|
| 71 |
+
- NO ```json code fences
|
| 72 |
+
- NO markdown formatting
|
| 73 |
+
- NO explanatory text before or after
|
| 74 |
+
- Use "N/A" for missing information
|
| 75 |
+
- Ensure all strings are properly quoted
|
| 76 |
+
- Ensure all JSON syntax is valid
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
RISK_ANALYSIS_PROMPT = """
|
| 80 |
+
Analyze the deed clauses for potential legal risks. Provide your analysis in this EXACT format:
|
| 81 |
+
|
| 82 |
+
RISK LEVEL: [LOW|MEDIUM|HIGH]
|
| 83 |
+
RISK CATEGORY: [TITLE|ENCUMBRANCE|WARRANTY|COVENANT|EASEMENT|RESTRICTION|OTHER]
|
| 84 |
+
EXPLANATION: [Plain language explanation of the risk and potential consequences]
|
| 85 |
+
RECOMMENDATION: [Recommended actions]
|
| 86 |
+
|
| 87 |
+
Focus on common deed issues like:
|
| 88 |
+
- Title defects or clouds
|
| 89 |
+
- Undisclosed encumbrances
|
| 90 |
+
- Warranty limitations
|
| 91 |
+
- Easement problems
|
| 92 |
+
- Restrictive covenants
|
| 93 |
+
- Boundary disputes
|
| 94 |
+
- Missing signatures or witnesses
|
| 95 |
+
|
| 96 |
+
Important: Start your response with "RISK LEVEL:" and follow the exact format above.
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _data_url_from_bytes(data: bytes, mime: str) -> str:
|
| 101 |
+
b64 = base64.b64encode(data).decode("ascii")
|
| 102 |
+
return f"data:{mime};base64,{b64}"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def _clean_llm_json_response(response: str) -> str:
|
| 106 |
+
"""Clean LLM response to extract valid JSON."""
|
| 107 |
+
# Remove leading/trailing whitespace
|
| 108 |
+
cleaned = response.strip()
|
| 109 |
+
|
| 110 |
+
# Remove code fences if present
|
| 111 |
+
if cleaned.startswith("```"):
|
| 112 |
+
lines = cleaned.split("\n")
|
| 113 |
+
# Remove first line with ```json or ```
|
| 114 |
+
lines = lines[1:]
|
| 115 |
+
# Remove last line with ```
|
| 116 |
+
if lines and lines[-1].strip() == "```":
|
| 117 |
+
lines = lines[:-1]
|
| 118 |
+
cleaned = "\n".join(lines).strip()
|
| 119 |
+
|
| 120 |
+
# Look for JSON object boundaries
|
| 121 |
+
start_idx = cleaned.find("{")
|
| 122 |
+
end_idx = cleaned.rfind("}") + 1
|
| 123 |
+
|
| 124 |
+
if start_idx != -1 and end_idx > start_idx:
|
| 125 |
+
cleaned = cleaned[start_idx:end_idx]
|
| 126 |
+
|
| 127 |
+
# Remove any remaining non-JSON text before or after
|
| 128 |
+
lines = cleaned.split("\n")
|
| 129 |
+
json_lines = []
|
| 130 |
+
in_json = False
|
| 131 |
+
|
| 132 |
+
for line in lines:
|
| 133 |
+
stripped = line.strip()
|
| 134 |
+
if stripped.startswith("{") or in_json:
|
| 135 |
+
in_json = True
|
| 136 |
+
json_lines.append(line)
|
| 137 |
+
if stripped.endswith("}") and line.count("{") <= line.count("}"):
|
| 138 |
+
break
|
| 139 |
+
|
| 140 |
+
if json_lines:
|
| 141 |
+
cleaned = "\n".join(json_lines)
|
| 142 |
+
|
| 143 |
+
return cleaned.strip()
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _path_to_data_url(path: pathlib.Path) -> str:
|
| 147 |
+
mime = mimetypes.guess_type(path.name)[0] or "image/png"
|
| 148 |
+
return _data_url_from_bytes(path.read_bytes(), mime)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
async def _prepare_image_payload(image_input: str) -> str:
|
| 152 |
+
"""
|
| 153 |
+
Accept local paths, http(s) URLs, or data URLs and normalize to a data URL string.
|
| 154 |
+
"""
|
| 155 |
+
if image_input.startswith("data:"):
|
| 156 |
+
return image_input
|
| 157 |
+
|
| 158 |
+
path = pathlib.Path(image_input).expanduser()
|
| 159 |
+
if path.exists():
|
| 160 |
+
return _path_to_data_url(path)
|
| 161 |
+
|
| 162 |
+
if image_input.startswith(("http://", "https://")):
|
| 163 |
+
async with httpx.AsyncClient() as http_client:
|
| 164 |
+
response = await http_client.get(image_input)
|
| 165 |
+
response.raise_for_status()
|
| 166 |
+
mime = (
|
| 167 |
+
response.headers.get("Content-Type")
|
| 168 |
+
or mimetypes.guess_type(image_input)[0]
|
| 169 |
+
or "image/png"
|
| 170 |
+
)
|
| 171 |
+
return _data_url_from_bytes(response.content, mime)
|
| 172 |
+
|
| 173 |
+
raise FileNotFoundError(f"Unable to locate image at {image_input}")
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def _postprocess_ocr_text(raw_text: str) -> str:
|
| 177 |
+
"""
|
| 178 |
+
Postprocess OCR text with layout preservation.
|
| 179 |
+
Preserves document structure while doing minimal cleanup.
|
| 180 |
+
"""
|
| 181 |
+
if not raw_text:
|
| 182 |
+
return ""
|
| 183 |
+
|
| 184 |
+
# Preserve layout formatting while doing minimal cleanup
|
| 185 |
+
# Only remove excessive empty lines (more than 2 consecutive newlines)
|
| 186 |
+
# while preserving the overall layout structure
|
| 187 |
+
cleaned_text = "\n".join([
|
| 188 |
+
line.rstrip() for line in raw_text.split("\n")
|
| 189 |
+
])
|
| 190 |
+
|
| 191 |
+
# Remove excessive blank lines (keep max 2 consecutive)
|
| 192 |
+
cleaned_text = re.sub(r'\n\s*\n\s*\n+', '\n\n', cleaned_text)
|
| 193 |
+
|
| 194 |
+
return cleaned_text
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
async def _run_ocr_completion(image_data_url: str) -> str:
|
| 198 |
+
loop = asyncio.get_running_loop()
|
| 199 |
+
|
| 200 |
+
def _call_api() -> str:
|
| 201 |
+
completion = client.chat.completions.create(
|
| 202 |
+
model="Qwen/Qwen2.5-VL-72B-Instruct",
|
| 203 |
+
messages=[
|
| 204 |
+
{
|
| 205 |
+
"role": "system",
|
| 206 |
+
"content": (
|
| 207 |
+
"You are an OCR assistant. Extract all text and keep layout if "
|
| 208 |
+
"possible. Note: Don't include HTML tags in your response."
|
| 209 |
+
),
|
| 210 |
+
},
|
| 211 |
+
{
|
| 212 |
+
"role": "user",
|
| 213 |
+
"content": [
|
| 214 |
+
{"type": "image_url", "image_url": {"url": image_data_url}}
|
| 215 |
+
],
|
| 216 |
+
},
|
| 217 |
+
],
|
| 218 |
+
)
|
| 219 |
+
return completion.choices[0].message.content
|
| 220 |
+
|
| 221 |
+
raw_result = await loop.run_in_executor(None, _call_api)
|
| 222 |
+
|
| 223 |
+
# Apply layout-preserving postprocessing
|
| 224 |
+
return _postprocess_ocr_text(raw_result)
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
@mcp.tool()
|
| 228 |
+
async def ocr_image(image: str) -> str:
|
| 229 |
+
"""
|
| 230 |
+
Perform OCR on an image. Accepts a local path, http(s) URL, or data URL string.
|
| 231 |
+
"""
|
| 232 |
+
image_payload = await _prepare_image_payload(image)
|
| 233 |
+
return await _run_ocr_completion(image_payload)
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
# Helper functions for legal processing
|
| 237 |
+
|
| 238 |
+
async def _run_llm_completion(messages: List[Dict], temperature: float = 0.1) -> str:
|
| 239 |
+
"""Run LLM completion for legal analysis."""
|
| 240 |
+
loop = asyncio.get_running_loop()
|
| 241 |
+
|
| 242 |
+
def _call_api() -> str:
|
| 243 |
+
completion = client.chat.completions.create(
|
| 244 |
+
model="Qwen/Qwen2.5-VL-72B-Instruct",
|
| 245 |
+
messages=messages,
|
| 246 |
+
temperature=temperature,
|
| 247 |
+
max_tokens=4000
|
| 248 |
+
)
|
| 249 |
+
return completion.choices[0].message.content
|
| 250 |
+
|
| 251 |
+
return await loop.run_in_executor(None, _call_api)
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
async def _extract_text_directly_from_pdf(pdf_path: str) -> Dict[str, Any]:
|
| 255 |
+
"""Fast direct text extraction from PDF (no OCR needed)."""
|
| 256 |
+
try:
|
| 257 |
+
pdf_document = fitz.open(pdf_path)
|
| 258 |
+
all_text = []
|
| 259 |
+
pages_data = []
|
| 260 |
+
total_chars = 0
|
| 261 |
+
|
| 262 |
+
for page_num in range(pdf_document.page_count):
|
| 263 |
+
page = pdf_document[page_num]
|
| 264 |
+
page_text = page.get_text().strip()
|
| 265 |
+
|
| 266 |
+
all_text.append(f"--- Page {page_num + 1} ---\n{page_text}")
|
| 267 |
+
pages_data.append({
|
| 268 |
+
"page": page_num + 1,
|
| 269 |
+
"text": page_text,
|
| 270 |
+
"length": len(page_text)
|
| 271 |
+
})
|
| 272 |
+
total_chars += len(page_text)
|
| 273 |
+
|
| 274 |
+
pdf_document.close()
|
| 275 |
+
full_text = "\n\n".join(all_text)
|
| 276 |
+
|
| 277 |
+
return {
|
| 278 |
+
"success": True,
|
| 279 |
+
"text": full_text,
|
| 280 |
+
"pages": pages_data,
|
| 281 |
+
"metadata": {
|
| 282 |
+
"total_pages": len(pages_data),
|
| 283 |
+
"method": "direct_text_extraction",
|
| 284 |
+
"total_length": total_chars,
|
| 285 |
+
"processing_time_seconds": "< 1"
|
| 286 |
+
}
|
| 287 |
+
}
|
| 288 |
+
|
| 289 |
+
except Exception as e:
|
| 290 |
+
return {
|
| 291 |
+
"success": False,
|
| 292 |
+
"text": "",
|
| 293 |
+
"pages": [],
|
| 294 |
+
"metadata": {},
|
| 295 |
+
"error": str(e)
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
async def _convert_pdf_pages_to_images(pdf_path: str) -> List[str]:
|
| 299 |
+
"""Convert PDF pages to image data URLs for use with existing OCR tool (fallback method)."""
|
| 300 |
+
try:
|
| 301 |
+
pdf_document = fitz.open(pdf_path)
|
| 302 |
+
image_data_urls = []
|
| 303 |
+
|
| 304 |
+
for page_num in range(pdf_document.page_count):
|
| 305 |
+
page = pdf_document[page_num]
|
| 306 |
+
# Convert page to image with lower resolution for faster processing
|
| 307 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5)) # Reduced from 2.0 to 1.5
|
| 308 |
+
img_data = pix.tobytes("png")
|
| 309 |
+
|
| 310 |
+
# Convert to data URL that the existing ocr_image tool expects
|
| 311 |
+
data_url = _data_url_from_bytes(img_data, "image/png")
|
| 312 |
+
image_data_urls.append(data_url)
|
| 313 |
+
|
| 314 |
+
pdf_document.close()
|
| 315 |
+
return image_data_urls
|
| 316 |
+
|
| 317 |
+
except Exception as e:
|
| 318 |
+
raise RuntimeError(f"PDF to image conversion failed: {str(e)}")
|
| 319 |
+
|
| 320 |
+
|
| 321 |
+
def _split_deed_into_clauses(text: str) -> Dict[str, Any]:
|
| 322 |
+
"""Split deed text into logical clauses using pattern matching."""
|
| 323 |
+
clauses = []
|
| 324 |
+
|
| 325 |
+
# Common deed section patterns
|
| 326 |
+
section_patterns = [
|
| 327 |
+
(r"WITNESSETH[:\s].*?", "Recitals"),
|
| 328 |
+
(r"TO HAVE AND TO HOLD.*?", "Habendum Clause"),
|
| 329 |
+
(r"SUBJECT TO.*?", "Exceptions and Reservations"),
|
| 330 |
+
(r"COVENANT[S]?.*?", "Covenants"),
|
| 331 |
+
(r"WARRANTY.*?", "Warranty Clause"),
|
| 332 |
+
(r"IN WITNESS WHEREOF.*?", "Execution Clause"),
|
| 333 |
+
(r"GRANTETH.*?", "Granting Clause"),
|
| 334 |
+
(r"FOR AND IN CONSIDERATION.*?", "Consideration Clause"),
|
| 335 |
+
(r"EASEMENT[S]?.*?", "Easement"),
|
| 336 |
+
(r"RESTRICTION[S]?.*?", "Restrictions")
|
| 337 |
+
]
|
| 338 |
+
|
| 339 |
+
# Split by paragraphs and page breaks
|
| 340 |
+
text = re.sub(r'--- Page \d+ ---', '\n\n', text)
|
| 341 |
+
paragraphs = [p.strip() for p in re.split(r'\n\s*\n', text) if p.strip()]
|
| 342 |
+
|
| 343 |
+
clause_id = 1
|
| 344 |
+
for paragraph in paragraphs:
|
| 345 |
+
if len(paragraph) < 20: # Skip very short paragraphs
|
| 346 |
+
continue
|
| 347 |
+
|
| 348 |
+
# Try to identify clause type
|
| 349 |
+
clause_type = "General"
|
| 350 |
+
for pattern, ctype in section_patterns:
|
| 351 |
+
if re.search(pattern, paragraph, re.IGNORECASE):
|
| 352 |
+
clause_type = ctype
|
| 353 |
+
break
|
| 354 |
+
|
| 355 |
+
clauses.append({
|
| 356 |
+
"id": f"clause_{clause_id}",
|
| 357 |
+
"type": clause_type,
|
| 358 |
+
"text": paragraph,
|
| 359 |
+
"length": len(paragraph),
|
| 360 |
+
"word_count": len(paragraph.split())
|
| 361 |
+
})
|
| 362 |
+
clause_id += 1
|
| 363 |
+
|
| 364 |
+
return {
|
| 365 |
+
"success": True,
|
| 366 |
+
"clauses": clauses,
|
| 367 |
+
"total_clauses": len(clauses),
|
| 368 |
+
"metadata": {
|
| 369 |
+
"total_paragraphs": len(paragraphs),
|
| 370 |
+
"processing_method": "pattern_matching"
|
| 371 |
+
}
|
| 372 |
+
}
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
# NEW LEGAL DEED PROCESSING TOOLS
|
| 376 |
+
|
| 377 |
+
@mcp.tool()
|
| 378 |
+
async def extract_text_from_deed_pdf(pdf_path: str) -> str:
|
| 379 |
+
"""
|
| 380 |
+
Extract text from a PDF deed document. Try fast direct text first, then OCR fallback.
|
| 381 |
+
|
| 382 |
+
Args:
|
| 383 |
+
pdf_path: Path to the PDF deed file
|
| 384 |
+
|
| 385 |
+
Returns:
|
| 386 |
+
JSON string with extracted text, pages, and metadata
|
| 387 |
+
"""
|
| 388 |
+
try:
|
| 389 |
+
path = pathlib.Path(pdf_path).expanduser()
|
| 390 |
+
if not path.exists():
|
| 391 |
+
return json.dumps({"success": False, "error": f"PDF file not found: {pdf_path}"})
|
| 392 |
+
if not pdf_path.lower().endswith(".pdf"):
|
| 393 |
+
return json.dumps({"success": False, "error": "File must be a PDF document"})
|
| 394 |
+
|
| 395 |
+
# Step 1: Fast direct text extraction
|
| 396 |
+
direct_result = await _extract_text_directly_from_pdf(str(path))
|
| 397 |
+
direct_text = direct_result.get("text", "")
|
| 398 |
+
|
| 399 |
+
if direct_result.get("success") and len(direct_text) > 50:
|
| 400 |
+
print("🔍 Extracting text from PDF using direct text extraction...")
|
| 401 |
+
return json.dumps(direct_result, indent=2)
|
| 402 |
+
|
| 403 |
+
# Step 2: OCR fallback when direct text is empty/short
|
| 404 |
+
print("🔍 Using OCR processing...")
|
| 405 |
+
image_data_urls = await _convert_pdf_pages_to_images(str(path))
|
| 406 |
+
|
| 407 |
+
all_text, pages_data = [], []
|
| 408 |
+
for page_num, data_url in enumerate(image_data_urls, 1):
|
| 409 |
+
print(f"📄 Processing page {page_num}/{len(image_data_urls)} with OCR...")
|
| 410 |
+
page_text = await ocr_image(data_url)
|
| 411 |
+
all_text.append(f"--- Page {page_num} ---\n{page_text}")
|
| 412 |
+
pages_data.append({"page": page_num, "text": page_text, "length": len(page_text)})
|
| 413 |
+
|
| 414 |
+
full_text = "\n\n".join(all_text)
|
| 415 |
+
result = {
|
| 416 |
+
"success": True,
|
| 417 |
+
"text": full_text,
|
| 418 |
+
"pages": pages_data,
|
| 419 |
+
"metadata": {
|
| 420 |
+
"total_pages": len(pages_data),
|
| 421 |
+
"method": "PDF_to_image_OCR_via_existing_tool",
|
| 422 |
+
"total_length": len(full_text)
|
| 423 |
+
}
|
| 424 |
+
}
|
| 425 |
+
print(f"✅ OCR processing complete! {len(full_text)} characters extracted")
|
| 426 |
+
return json.dumps(result, indent=2)
|
| 427 |
+
except Exception as e:
|
| 428 |
+
return json.dumps({"success": False, "error": f"PDF processing failed: {str(e)}"})
|
| 429 |
+
|
| 430 |
+
|
| 431 |
+
|
| 432 |
+
@mcp.tool()
|
| 433 |
+
async def split_deed_into_clauses(text: str) -> str:
|
| 434 |
+
"""
|
| 435 |
+
Split deed text into logical clauses and sections.
|
| 436 |
+
|
| 437 |
+
Args:
|
| 438 |
+
text: The full deed text to analyze
|
| 439 |
+
|
| 440 |
+
Returns:
|
| 441 |
+
JSON string with identified clauses and their types
|
| 442 |
+
"""
|
| 443 |
+
try:
|
| 444 |
+
if not text or not text.strip():
|
| 445 |
+
return json.dumps({
|
| 446 |
+
"success": False,
|
| 447 |
+
"error": "No text provided for clause analysis"
|
| 448 |
+
})
|
| 449 |
+
|
| 450 |
+
result = _split_deed_into_clauses(text.strip())
|
| 451 |
+
return json.dumps(result, indent=2)
|
| 452 |
+
|
| 453 |
+
except Exception as e:
|
| 454 |
+
return json.dumps({
|
| 455 |
+
"success": False,
|
| 456 |
+
"error": f"Clause analysis failed: {str(e)}"
|
| 457 |
+
})
|
| 458 |
+
|
| 459 |
+
|
| 460 |
+
@mcp.tool()
|
| 461 |
+
async def classify_deed_type(deed_text: str, metadata: Optional[str] = None) -> str:
|
| 462 |
+
"""
|
| 463 |
+
Classify the deed type and extract key metadata.
|
| 464 |
+
|
| 465 |
+
Args:
|
| 466 |
+
deed_text: The full deed text
|
| 467 |
+
metadata: Optional additional metadata about the deed
|
| 468 |
+
|
| 469 |
+
Returns:
|
| 470 |
+
JSON string with deed classification and extracted information
|
| 471 |
+
"""
|
| 472 |
+
try:
|
| 473 |
+
messages = [
|
| 474 |
+
{"role": "system", "content": SYSTEM_DEED_LAWYER},
|
| 475 |
+
{"role": "user", "content": f"{CLASSIFY_DEED_PROMPT}\n\nDEED TEXT:\n{deed_text[:3000]}"}
|
| 476 |
+
]
|
| 477 |
+
|
| 478 |
+
if metadata:
|
| 479 |
+
messages[-1]["content"] += f"\n\nADDITIONAL METADATA:\n{metadata}"
|
| 480 |
+
|
| 481 |
+
classification_result = await _run_llm_completion(messages)
|
| 482 |
+
|
| 483 |
+
# Clean and parse the JSON response more robustly
|
| 484 |
+
cleaned_json = _clean_llm_json_response(classification_result)
|
| 485 |
+
|
| 486 |
+
try:
|
| 487 |
+
parsed_result = json.loads(cleaned_json)
|
| 488 |
+
result = {
|
| 489 |
+
"success": True,
|
| 490 |
+
"classification": parsed_result,
|
| 491 |
+
"raw_response": classification_result
|
| 492 |
+
}
|
| 493 |
+
except json.JSONDecodeError as e:
|
| 494 |
+
print(f"🚨 JSON parsing failed for classification: {str(e)}")
|
| 495 |
+
print(f"🔍 Original response: {classification_result[:200]}...")
|
| 496 |
+
print(f"🔍 Cleaned response: {cleaned_json[:200]}...")
|
| 497 |
+
result = {
|
| 498 |
+
"success": True,
|
| 499 |
+
"classification": {"raw_analysis": classification_result},
|
| 500 |
+
"raw_response": classification_result
|
| 501 |
+
}
|
| 502 |
+
|
| 503 |
+
return json.dumps(result, indent=2)
|
| 504 |
+
|
| 505 |
+
except Exception as e:
|
| 506 |
+
return json.dumps({
|
| 507 |
+
"success": False,
|
| 508 |
+
"error": f"Deed classification failed: {str(e)}"
|
| 509 |
+
})
|
| 510 |
+
|
| 511 |
+
|
| 512 |
+
@mcp.tool()
|
| 513 |
+
async def analyze_deed_risks(clauses: str, deed_classification: Optional[str] = None) -> str:
|
| 514 |
+
"""
|
| 515 |
+
Analyze legal risks in deed clauses (rule-based approach without RAG).
|
| 516 |
+
|
| 517 |
+
Args:
|
| 518 |
+
clauses: JSON string of deed clauses from split_deed_into_clauses
|
| 519 |
+
deed_classification: Optional classification data from classify_deed_type
|
| 520 |
+
|
| 521 |
+
Returns:
|
| 522 |
+
JSON string with risk analysis for each clause
|
| 523 |
+
"""
|
| 524 |
+
try:
|
| 525 |
+
# Parse clauses input
|
| 526 |
+
try:
|
| 527 |
+
clauses_data = json.loads(clauses) if isinstance(clauses, str) else clauses
|
| 528 |
+
if not clauses_data.get("success") or not clauses_data.get("clauses"):
|
| 529 |
+
return json.dumps({
|
| 530 |
+
"success": False,
|
| 531 |
+
"error": "Invalid clauses data provided"
|
| 532 |
+
})
|
| 533 |
+
except json.JSONDecodeError:
|
| 534 |
+
return json.dumps({
|
| 535 |
+
"success": False,
|
| 536 |
+
"error": "Could not parse clauses JSON"
|
| 537 |
+
})
|
| 538 |
+
|
| 539 |
+
clause_list = clauses_data["clauses"]
|
| 540 |
+
risks_analysis = []
|
| 541 |
+
|
| 542 |
+
# Analyze each clause for risks
|
| 543 |
+
for clause in clause_list:
|
| 544 |
+
clause_text = clause.get("text", "")
|
| 545 |
+
clause_type = clause.get("type", "General")
|
| 546 |
+
|
| 547 |
+
prompt = f"{RISK_ANALYSIS_PROMPT}\n\nCLAUSE TYPE: {clause_type}\nCLAUSE TEXT:\n{clause_text}"
|
| 548 |
+
|
| 549 |
+
if deed_classification:
|
| 550 |
+
prompt += f"\n\nDEED CONTEXT:\n{deed_classification}"
|
| 551 |
+
|
| 552 |
+
messages = [
|
| 553 |
+
{"role": "system", "content": SYSTEM_DEED_LAWYER},
|
| 554 |
+
{"role": "user", "content": prompt}
|
| 555 |
+
]
|
| 556 |
+
|
| 557 |
+
risk_analysis = await _run_llm_completion(messages, temperature=0.2)
|
| 558 |
+
print(f"📊 DEBUG Risk Analysis for {clause['id']} ({clause_type}): {risk_analysis[:200]}...")
|
| 559 |
+
|
| 560 |
+
risks_analysis.append({
|
| 561 |
+
"clause_id": clause["id"],
|
| 562 |
+
"clause_type": clause_type,
|
| 563 |
+
"risk_analysis": risk_analysis,
|
| 564 |
+
"clause_length": clause.get("length", 0)
|
| 565 |
+
})
|
| 566 |
+
|
| 567 |
+
# Extract risk levels from individual analyses for aggregation
|
| 568 |
+
individual_risk_levels = []
|
| 569 |
+
for risk_item in risks_analysis:
|
| 570 |
+
analysis_text = risk_item.get("risk_analysis", "")
|
| 571 |
+
# Extract risk level using regex patterns
|
| 572 |
+
import re
|
| 573 |
+
risk_match = re.search(r'RISK\s+LEVEL[:\s]+([A-Z]+)', analysis_text.upper())
|
| 574 |
+
if risk_match:
|
| 575 |
+
individual_risk_levels.append(risk_match.group(1))
|
| 576 |
+
else:
|
| 577 |
+
# Fallback to old extraction method
|
| 578 |
+
for level in ["HIGH", "MEDIUM", "LOW"]:
|
| 579 |
+
if level in analysis_text.upper():
|
| 580 |
+
individual_risk_levels.append(level)
|
| 581 |
+
break
|
| 582 |
+
|
| 583 |
+
# Calculate overall risk level based on individual clause risks
|
| 584 |
+
overall_risk_level = "LOW" # Default
|
| 585 |
+
if "HIGH" in individual_risk_levels:
|
| 586 |
+
overall_risk_level = "HIGH"
|
| 587 |
+
elif "MEDIUM" in individual_risk_levels:
|
| 588 |
+
overall_risk_level = "MEDIUM"
|
| 589 |
+
|
| 590 |
+
print(f"📊 DEBUG Individual risk levels found: {individual_risk_levels}")
|
| 591 |
+
print(f"📊 DEBUG Calculated overall risk level: {overall_risk_level}")
|
| 592 |
+
|
| 593 |
+
# Generate overall risk summary with structured format
|
| 594 |
+
summary_prompt = f"""
|
| 595 |
+
Based on the following risk analyses of individual clauses, provide an overall risk assessment for this deed.
|
| 596 |
+
|
| 597 |
+
Calculated Overall Risk Level: {overall_risk_level}
|
| 598 |
+
Individual Clause Risk Levels: {individual_risk_levels}
|
| 599 |
+
|
| 600 |
+
Clause Risk Analyses:
|
| 601 |
+
{json.dumps(risks_analysis, indent=2)}
|
| 602 |
+
|
| 603 |
+
Provide your response in this EXACT format:
|
| 604 |
+
|
| 605 |
+
OVERALL RISK LEVEL: {overall_risk_level}
|
| 606 |
+
KEY FINDINGS:
|
| 607 |
+
- [Most critical issue 1]
|
| 608 |
+
- [Most critical issue 2]
|
| 609 |
+
- [Most critical issue 3]
|
| 610 |
+
|
| 611 |
+
RISK CATEGORIES FOUND: [List categories like TITLE, WARRANTY, etc.]
|
| 612 |
+
RECOMMENDATIONS:
|
| 613 |
+
- [Recommendation 1]
|
| 614 |
+
- [Recommendation 2]
|
| 615 |
+
|
| 616 |
+
DISCLAIMER: This analysis is for informational purposes only and does not constitute legal advice. Consult a qualified attorney for legal guidance.
|
| 617 |
+
|
| 618 |
+
Start your response with "OVERALL RISK LEVEL: {overall_risk_level}" and follow the exact format above.
|
| 619 |
+
"""
|
| 620 |
+
|
| 621 |
+
summary_messages = [
|
| 622 |
+
{"role": "system", "content": SYSTEM_DEED_LAWYER},
|
| 623 |
+
{"role": "user", "content": summary_prompt}
|
| 624 |
+
]
|
| 625 |
+
|
| 626 |
+
overall_summary = await _run_llm_completion(summary_messages)
|
| 627 |
+
print(f"📊 DEBUG Overall Risk Summary: {overall_summary[:300]}...")
|
| 628 |
+
|
| 629 |
+
result = {
|
| 630 |
+
"success": True,
|
| 631 |
+
"clause_risks": risks_analysis,
|
| 632 |
+
"overall_summary": overall_summary,
|
| 633 |
+
"total_clauses_analyzed": len(clause_list),
|
| 634 |
+
"analysis_method": "rule_based_llm_analysis",
|
| 635 |
+
"disclaimer": "This analysis is for informational purposes only and does not constitute legal advice. Consult a qualified attorney for legal guidance."
|
| 636 |
+
}
|
| 637 |
+
|
| 638 |
+
return json.dumps(result, indent=2)
|
| 639 |
+
|
| 640 |
+
except Exception as e:
|
| 641 |
+
return json.dumps({
|
| 642 |
+
"success": False,
|
| 643 |
+
"error": f"Risk analysis failed: {str(e)}"
|
| 644 |
+
})
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
@mcp.tool()
|
| 648 |
+
async def generate_comprehensive_deed_report(pdf_path: str) -> str:
|
| 649 |
+
"""
|
| 650 |
+
Generate a complete deed review report using all analysis tools.
|
| 651 |
+
|
| 652 |
+
Args:
|
| 653 |
+
pdf_path: Path to the PDF deed document
|
| 654 |
+
|
| 655 |
+
Returns:
|
| 656 |
+
JSON string with comprehensive deed analysis report
|
| 657 |
+
"""
|
| 658 |
+
try:
|
| 659 |
+
print("🔍 Extracting text from PDF using existing OCR system...")
|
| 660 |
+
text_result = await extract_text_from_deed_pdf(pdf_path)
|
| 661 |
+
text_data = json.loads(text_result)
|
| 662 |
+
|
| 663 |
+
if not text_data["success"]:
|
| 664 |
+
return json.dumps({
|
| 665 |
+
"success": False,
|
| 666 |
+
"error": f"Could not extract text from PDF: {text_data.get('error')}"
|
| 667 |
+
})
|
| 668 |
+
|
| 669 |
+
deed_text = text_data["text"]
|
| 670 |
+
|
| 671 |
+
print("📋 Classifying deed type...")
|
| 672 |
+
classification_result = await classify_deed_type(deed_text)
|
| 673 |
+
classification_data = json.loads(classification_result)
|
| 674 |
+
|
| 675 |
+
print("✂️ Splitting into clauses...")
|
| 676 |
+
clauses_result = await split_deed_into_clauses(deed_text)
|
| 677 |
+
clauses_data = json.loads(clauses_result)
|
| 678 |
+
|
| 679 |
+
print("⚠️ Analyzing legal risks...")
|
| 680 |
+
risks_result = await analyze_deed_risks(
|
| 681 |
+
clauses_result,
|
| 682 |
+
json.dumps(classification_data.get("classification", {}))
|
| 683 |
+
)
|
| 684 |
+
risks_data = json.loads(risks_result)
|
| 685 |
+
|
| 686 |
+
# Compile comprehensive report
|
| 687 |
+
report = {
|
| 688 |
+
"success": True,
|
| 689 |
+
"pdf_path": pdf_path,
|
| 690 |
+
"extraction_metadata": text_data.get("metadata", {}),
|
| 691 |
+
"deed_classification": classification_data,
|
| 692 |
+
"clause_breakdown": clauses_data,
|
| 693 |
+
"risk_analysis": risks_data,
|
| 694 |
+
"text_preview": deed_text, # Full text instead of truncated preview
|
| 695 |
+
"report_metadata": {
|
| 696 |
+
"generated_at": asyncio.get_event_loop().time(),
|
| 697 |
+
"analysis_steps": ["text_extraction_via_ocr", "classification", "clause_parsing", "risk_analysis"],
|
| 698 |
+
"processing_method": "existing_ocr_system_reused"
|
| 699 |
+
},
|
| 700 |
+
"legal_disclaimer": {
|
| 701 |
+
"notice": "This automated analysis is for informational purposes only.",
|
| 702 |
+
"warning": "This does not constitute legal advice. Always consult with a qualified attorney.",
|
| 703 |
+
"scope": "This analysis may not identify all potential legal issues.",
|
| 704 |
+
"recommendation": "Have this deed reviewed by a licensed attorney before taking any action."
|
| 705 |
+
}
|
| 706 |
+
}
|
| 707 |
+
|
| 708 |
+
print("✅ Comprehensive deed report generated successfully")
|
| 709 |
+
return json.dumps(report, indent=2)
|
| 710 |
+
|
| 711 |
+
except Exception as e:
|
| 712 |
+
return json.dumps({
|
| 713 |
+
"success": False,
|
| 714 |
+
"error": f"Report generation failed: {str(e)}"
|
| 715 |
+
})
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
if __name__ == "__main__":
|
| 719 |
+
print("🏛️ Starting Legal Deed MCP Server...")
|
| 720 |
+
print("📊 Available tools:")
|
| 721 |
+
print(" - ocr_image: Original OCR for images (unchanged)")
|
| 722 |
+
print(" - extract_text_from_deed_pdf: Extract text from PDF deeds using existing OCR")
|
| 723 |
+
print(" - split_deed_into_clauses: Identify and categorize deed clauses")
|
| 724 |
+
print(" - classify_deed_type: Determine deed type and extract metadata")
|
| 725 |
+
print(" - analyze_deed_risks: Analyze legal risks without RAG system")
|
| 726 |
+
print(" - generate_comprehensive_deed_report: Complete deed analysis pipeline")
|
| 727 |
+
print("\n⚖️ Legal Notice: This tool provides analysis only, not legal advice.")
|
| 728 |
+
print("🚀 Server starting...")
|
| 729 |
+
mcp.run()
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
httpx>=0.28.1
|
| 2 |
+
mcp[cli]>=1.21.2
|
| 3 |
+
openai>=2.8.1
|
| 4 |
+
fastapi>=0.104.1
|
| 5 |
+
uvicorn>=0.24.0
|
| 6 |
+
gradio>=6.0.0
|
| 7 |
+
python-multipart>=0.0.6
|
| 8 |
+
python-dotenv>=1.0.0
|
| 9 |
+
pillow>=11.3.0
|
| 10 |
+
langgraph>=0.2.0
|
| 11 |
+
langchain>=0.3.0
|
| 12 |
+
langchain-core>=0.3.0
|
| 13 |
+
PyMuPDF>=1.23.0
|
| 14 |
+
reportlab>=4.0.0
|
web_app.py
ADDED
|
@@ -0,0 +1,1632 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI web application with Gradio interface for Legal Deed Review.
|
| 3 |
+
"""
|
| 4 |
+
import asyncio
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
import re
|
| 8 |
+
import tempfile
|
| 9 |
+
import re
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from typing import Dict, Any, Optional, List
|
| 12 |
+
|
| 13 |
+
import gradio as gr
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
| 16 |
+
|
| 17 |
+
# Import the functions from main.py
|
| 18 |
+
from main import (
|
| 19 |
+
extract_text_from_deed_pdf,
|
| 20 |
+
split_deed_into_clauses,
|
| 21 |
+
classify_deed_type,
|
| 22 |
+
analyze_deed_risks,
|
| 23 |
+
generate_comprehensive_deed_report
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
load_dotenv()
|
| 27 |
+
|
| 28 |
+
app = FastAPI(
|
| 29 |
+
title="Legal Deed Review System",
|
| 30 |
+
description="Upload PDF deed documents for comprehensive legal risk analysis"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# Legal disclaimers
|
| 34 |
+
LEGAL_DISCLAIMER = """
|
| 35 |
+
⚖️ **LEGAL DISCLAIMER**
|
| 36 |
+
|
| 37 |
+
**This is an automated analysis tool for informational purposes only.**
|
| 38 |
+
|
| 39 |
+
- ❌ This does NOT constitute legal advice
|
| 40 |
+
- ❌ This does NOT replace consultation with a qualified attorney
|
| 41 |
+
- ❌ This analysis may NOT identify all potential legal issues
|
| 42 |
+
- ✅ Always have deeds reviewed by a licensed attorney before taking action
|
| 43 |
+
- ✅ Consult local legal professionals familiar with your jurisdiction
|
| 44 |
+
|
| 45 |
+
**By using this tool, you acknowledge these limitations.**
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
RISK_LEVEL_COLORS = {
|
| 49 |
+
"LOW": "🟢",
|
| 50 |
+
"MEDIUM": "🟡",
|
| 51 |
+
"HIGH": "🔴"
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
async def process_deed_pdf(pdf_file) -> Dict[str, Any]:
|
| 56 |
+
"""Process a PDF deed file and return the raw report data."""
|
| 57 |
+
if pdf_file is None:
|
| 58 |
+
return {"error": "Please upload a PDF deed document first."}
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
# Handle different Gradio file input formats
|
| 62 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 63 |
+
if hasattr(pdf_file, 'read'):
|
| 64 |
+
# File-like object
|
| 65 |
+
tmp_file.write(pdf_file.read())
|
| 66 |
+
elif isinstance(pdf_file, str):
|
| 67 |
+
# File path string
|
| 68 |
+
with open(pdf_file, 'rb') as f:
|
| 69 |
+
tmp_file.write(f.read())
|
| 70 |
+
elif isinstance(pdf_file, bytes):
|
| 71 |
+
# Raw bytes
|
| 72 |
+
tmp_file.write(pdf_file)
|
| 73 |
+
else:
|
| 74 |
+
return {"error": f"Unsupported file format: {type(pdf_file)}"}
|
| 75 |
+
|
| 76 |
+
tmp_path = tmp_file.name
|
| 77 |
+
|
| 78 |
+
# Generate comprehensive report
|
| 79 |
+
report_result = await generate_comprehensive_deed_report(tmp_path)
|
| 80 |
+
report_data = json.loads(report_result)
|
| 81 |
+
|
| 82 |
+
# Clean up temp file
|
| 83 |
+
os.unlink(tmp_path)
|
| 84 |
+
|
| 85 |
+
if not report_data.get("success"):
|
| 86 |
+
error_msg = f"Analysis failed: {report_data.get('error', 'Unknown error')}"
|
| 87 |
+
return {"error": error_msg}
|
| 88 |
+
|
| 89 |
+
return report_data
|
| 90 |
+
|
| 91 |
+
except Exception as e:
|
| 92 |
+
return {"error": f"Error processing deed: {str(e)}"}
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def format_classification_display(classification_data: Dict) -> str:
|
| 96 |
+
"""Format deed classification for display."""
|
| 97 |
+
if not classification_data or not classification_data.get("success"):
|
| 98 |
+
return "❌ Classification failed"
|
| 99 |
+
|
| 100 |
+
classification = classification_data.get("classification", {})
|
| 101 |
+
if isinstance(classification, str):
|
| 102 |
+
try:
|
| 103 |
+
classification = json.loads(classification)
|
| 104 |
+
except Exception:
|
| 105 |
+
classification = {}
|
| 106 |
+
|
| 107 |
+
# Handle nested classification structure from LLM
|
| 108 |
+
if "classification" in classification:
|
| 109 |
+
classification = classification["classification"]
|
| 110 |
+
|
| 111 |
+
if "raw_analysis" in classification:
|
| 112 |
+
return f"📋 **Deed Classification (LLM):**\n\n{classification['raw_analysis']}"
|
| 113 |
+
|
| 114 |
+
parts = []
|
| 115 |
+
deed_type = classification.get("deed_type") or classification.get("type")
|
| 116 |
+
if deed_type:
|
| 117 |
+
parts.append(f"• **Deed Type:** {deed_type}")
|
| 118 |
+
|
| 119 |
+
jurisdiction = classification.get("jurisdiction") or classification.get("jurisdiction_hint")
|
| 120 |
+
if jurisdiction:
|
| 121 |
+
if isinstance(jurisdiction, dict):
|
| 122 |
+
# Format jurisdiction nicely
|
| 123 |
+
country = jurisdiction.get("country", "")
|
| 124 |
+
state = jurisdiction.get("state_province", "") or jurisdiction.get("state", "")
|
| 125 |
+
if country and state:
|
| 126 |
+
jurisdiction_str = f"{country}, {state}"
|
| 127 |
+
elif country:
|
| 128 |
+
jurisdiction_str = country
|
| 129 |
+
else:
|
| 130 |
+
jurisdiction_str = json.dumps(jurisdiction, indent=2)
|
| 131 |
+
parts.append(f"• **Jurisdiction:** {jurisdiction_str}")
|
| 132 |
+
else:
|
| 133 |
+
parts.append(f"• **Jurisdiction:** {jurisdiction}")
|
| 134 |
+
|
| 135 |
+
parties = classification.get("key_parties") or classification.get("parties")
|
| 136 |
+
if parties:
|
| 137 |
+
if isinstance(parties, dict):
|
| 138 |
+
# Format parties nicely
|
| 139 |
+
party_lines = []
|
| 140 |
+
for role, party_info in parties.items():
|
| 141 |
+
if isinstance(party_info, dict) and "name" in party_info:
|
| 142 |
+
party_lines.append(f" - {role.title()}: {party_info['name']}")
|
| 143 |
+
else:
|
| 144 |
+
party_lines.append(f" - {role.title()}: {party_info}")
|
| 145 |
+
if party_lines:
|
| 146 |
+
parts.append(f"• **Parties:**\n" + "\n".join(party_lines))
|
| 147 |
+
else:
|
| 148 |
+
parts.append(f"• **Parties:** {json.dumps(parties, indent=2)}")
|
| 149 |
+
else:
|
| 150 |
+
parts.append(f"• **Parties:** {parties}")
|
| 151 |
+
|
| 152 |
+
property_desc = (
|
| 153 |
+
classification.get("property_description_and_location")
|
| 154 |
+
or classification.get("property_description")
|
| 155 |
+
or classification.get("property")
|
| 156 |
+
)
|
| 157 |
+
if property_desc:
|
| 158 |
+
if isinstance(property_desc, dict):
|
| 159 |
+
# Format property description nicely
|
| 160 |
+
prop_lines = []
|
| 161 |
+
for key, value in property_desc.items():
|
| 162 |
+
if value and str(value).strip():
|
| 163 |
+
prop_lines.append(f" - {key.replace('_', ' ').title()}: {value}")
|
| 164 |
+
if prop_lines:
|
| 165 |
+
parts.append(f"• **Property:**\n" + "\n".join(prop_lines))
|
| 166 |
+
else:
|
| 167 |
+
parts.append(f"• **Property:** {json.dumps(property_desc, indent=2)}")
|
| 168 |
+
else:
|
| 169 |
+
parts.append(f"• **Property:** {property_desc}")
|
| 170 |
+
|
| 171 |
+
consideration = classification.get("consideration_amount") or classification.get("consideration")
|
| 172 |
+
if consideration:
|
| 173 |
+
parts.append(f"• **Consideration:** {consideration}")
|
| 174 |
+
|
| 175 |
+
special = classification.get("special_conditions_or_restrictions")
|
| 176 |
+
if special:
|
| 177 |
+
if isinstance(special, (dict, list)):
|
| 178 |
+
parts.append(f"• **Special Conditions:** {json.dumps(special, indent=2)}")
|
| 179 |
+
else:
|
| 180 |
+
parts.append(f"• **Special Conditions:** {special}")
|
| 181 |
+
|
| 182 |
+
return "📋 **Deed Classification:**\n\n" + "\n".join(parts)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def _safe_preview(text: str, limit: int = 200) -> str:
|
| 186 |
+
return text[:limit] + ("..." if len(text) > limit else "")
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def format_clause_table(clause_data: Dict) -> List[List[Any]]:
|
| 190 |
+
"""Prepare clause breakdown as table rows in list format for Gradio DataFrame."""
|
| 191 |
+
print(f"🔍 DEBUG format_clause_table input: {clause_data}")
|
| 192 |
+
|
| 193 |
+
if not clause_data:
|
| 194 |
+
print("🚨 DEBUG: No clause data provided")
|
| 195 |
+
return []
|
| 196 |
+
|
| 197 |
+
if not clause_data.get("success"):
|
| 198 |
+
print(f"🚨 DEBUG: Clause data processing failed: {clause_data}")
|
| 199 |
+
return []
|
| 200 |
+
|
| 201 |
+
clauses = clause_data.get("clauses", [])
|
| 202 |
+
print(f"🔍 DEBUG: Found {len(clauses)} clauses")
|
| 203 |
+
|
| 204 |
+
rows = []
|
| 205 |
+
for i, clause in enumerate(clauses):
|
| 206 |
+
clause_id = clause.get("id", f"clause_{i+1}")
|
| 207 |
+
clause_type = clause.get("type", "General")
|
| 208 |
+
word_count = clause.get("word_count", 0)
|
| 209 |
+
clause_text = clause.get("text", "")
|
| 210 |
+
preview = clause_text # Show full text, no truncation
|
| 211 |
+
|
| 212 |
+
# Create row as list matching headers: ["id", "type", "words", "preview"]
|
| 213 |
+
row = [clause_id, clause_type, word_count, preview]
|
| 214 |
+
rows.append(row)
|
| 215 |
+
print(f"🔍 DEBUG: Clause {i+1}: {clause_id} ({clause_type}) - {word_count} words")
|
| 216 |
+
|
| 217 |
+
print(f"🔍 DEBUG format_clause_table output: {len(rows)} rows")
|
| 218 |
+
return rows
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _flatten_json(data: Any, parent_key: str = "") -> List[tuple]:
|
| 222 |
+
"""Flatten nested JSON into dotted keys."""
|
| 223 |
+
items: List[tuple] = []
|
| 224 |
+
if isinstance(data, dict):
|
| 225 |
+
for k, v in data.items():
|
| 226 |
+
new_key = f"{parent_key}.{k}" if parent_key else k
|
| 227 |
+
items.extend(_flatten_json(v, new_key))
|
| 228 |
+
elif isinstance(data, list):
|
| 229 |
+
for idx, v in enumerate(data):
|
| 230 |
+
new_key = f"{parent_key}[{idx}]" if parent_key else f"[{idx}]"
|
| 231 |
+
items.extend(_flatten_json(v, new_key))
|
| 232 |
+
else:
|
| 233 |
+
items.append((parent_key, data))
|
| 234 |
+
return items
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
def format_classification_table(classification_data: Dict) -> List[List[Any]]:
|
| 238 |
+
"""Prepare deed classification as table rows with readable nested formatting."""
|
| 239 |
+
if not classification_data:
|
| 240 |
+
return []
|
| 241 |
+
|
| 242 |
+
|
| 243 |
+
def _clean_json_response(response: str) -> str:
|
| 244 |
+
"""Clean JSON response similar to main.py function."""
|
| 245 |
+
cleaned = response.strip()
|
| 246 |
+
|
| 247 |
+
# Remove code fences if present
|
| 248 |
+
if cleaned.startswith("```"):
|
| 249 |
+
lines = cleaned.split("\n")
|
| 250 |
+
lines = lines[1:] # Remove first line
|
| 251 |
+
if lines and lines[-1].strip() == "```":
|
| 252 |
+
lines = lines[:-1] # Remove last line
|
| 253 |
+
cleaned = "\n".join(lines).strip()
|
| 254 |
+
|
| 255 |
+
# Look for JSON object boundaries
|
| 256 |
+
start_idx = cleaned.find("{")
|
| 257 |
+
end_idx = cleaned.rfind("}") + 1
|
| 258 |
+
|
| 259 |
+
if start_idx != -1 and end_idx > start_idx:
|
| 260 |
+
cleaned = cleaned[start_idx:end_idx]
|
| 261 |
+
|
| 262 |
+
return cleaned.strip()
|
| 263 |
+
|
| 264 |
+
# Handle the data structure from the LLM response
|
| 265 |
+
print(f"🔍 DEBUG format_classification_table input: {classification_data}")
|
| 266 |
+
|
| 267 |
+
# Extract classification from nested structure
|
| 268 |
+
if "classification" in classification_data:
|
| 269 |
+
classification = classification_data.get("classification", {})
|
| 270 |
+
else:
|
| 271 |
+
classification = classification_data
|
| 272 |
+
|
| 273 |
+
# Handle stringified JSON (including problematic formats)
|
| 274 |
+
if isinstance(classification, str):
|
| 275 |
+
try:
|
| 276 |
+
# Try multiple cleaning approaches
|
| 277 |
+
cleaned = _clean_json_response(classification)
|
| 278 |
+
classification = json.loads(cleaned)
|
| 279 |
+
print(f"🔍 DEBUG Successfully parsed JSON from string: {type(classification)}")
|
| 280 |
+
except Exception as e:
|
| 281 |
+
print(f"🚨 DEBUG JSON parsing failed: {e}")
|
| 282 |
+
# If it looks like it might be raw analysis text, don't treat as JSON
|
| 283 |
+
if "raw_analysis" in classification or len(classification) > 500:
|
| 284 |
+
classification = {"raw_analysis": classification}
|
| 285 |
+
else:
|
| 286 |
+
# Try to extract key info from the string
|
| 287 |
+
classification = _extract_basic_info_from_string(classification)
|
| 288 |
+
|
| 289 |
+
# Unwrap nested classification if present
|
| 290 |
+
if isinstance(classification, dict) and "classification" in classification:
|
| 291 |
+
classification = classification["classification"]
|
| 292 |
+
|
| 293 |
+
if not isinstance(classification, dict):
|
| 294 |
+
classification = {"value": str(classification)}
|
| 295 |
+
|
| 296 |
+
def _pretty(value: Any, indent: int = 0) -> str:
|
| 297 |
+
pad = " " * indent
|
| 298 |
+
if isinstance(value, dict):
|
| 299 |
+
lines = []
|
| 300 |
+
for k, v in value.items():
|
| 301 |
+
if isinstance(v, (dict, list)) and v: # Only show non-empty nested items
|
| 302 |
+
lines.append(f"{pad}{k.replace('_', ' ').title()}:")
|
| 303 |
+
lines.append(_pretty(v, indent + 1))
|
| 304 |
+
elif not isinstance(v, (dict, list)): # Show simple values
|
| 305 |
+
display_val = str(v) if v not in [None, "", "N/A"] else "N/A"
|
| 306 |
+
lines.append(f"{pad}{k.replace('_', ' ').title()}: {display_val}")
|
| 307 |
+
return "\n".join(lines)
|
| 308 |
+
elif isinstance(value, list):
|
| 309 |
+
if not value: # Empty list
|
| 310 |
+
return f"{pad}(None)"
|
| 311 |
+
lines = []
|
| 312 |
+
for idx, v in enumerate(value, 1):
|
| 313 |
+
if isinstance(v, (dict, list)):
|
| 314 |
+
lines.append(f"{pad}{idx}.")
|
| 315 |
+
lines.append(_pretty(v, indent + 1))
|
| 316 |
+
else:
|
| 317 |
+
lines.append(f"{pad}{idx}. {v}")
|
| 318 |
+
return "\n".join(lines)
|
| 319 |
+
return f"{pad}{value}"
|
| 320 |
+
|
| 321 |
+
rows: List[List[Any]] = []
|
| 322 |
+
for key, value in classification.items():
|
| 323 |
+
display_key = key.replace('_', ' ').title()
|
| 324 |
+
rows.append([display_key, _pretty(value)])
|
| 325 |
+
|
| 326 |
+
print(f"🔍 DEBUG format_classification_table output: {len(rows)} rows")
|
| 327 |
+
return rows
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
def _extract_basic_info_from_string(text: str) -> Dict[str, str]:
|
| 331 |
+
"""Extract basic info from problematic string responses."""
|
| 332 |
+
# Basic fallback - try to find key information
|
| 333 |
+
result = {}
|
| 334 |
+
|
| 335 |
+
# Try to extract deed type
|
| 336 |
+
if any(word in text.lower() for word in ["sale", "purchase", "buy"]):
|
| 337 |
+
result["deed_type"] = "sale"
|
| 338 |
+
elif "mortgage" in text.lower():
|
| 339 |
+
result["deed_type"] = "mortgage"
|
| 340 |
+
elif "lease" in text.lower():
|
| 341 |
+
result["deed_type"] = "lease"
|
| 342 |
+
else:
|
| 343 |
+
result["deed_type"] = "unknown"
|
| 344 |
+
|
| 345 |
+
# Try to find jurisdiction
|
| 346 |
+
if "bangladesh" in text.lower():
|
| 347 |
+
result["jurisdiction"] = "Bangladesh"
|
| 348 |
+
|
| 349 |
+
# If we can't extract much, show the raw text
|
| 350 |
+
if len(result) < 2:
|
| 351 |
+
result["raw_analysis"] = text[:500] + "..." if len(text) > 500 else text
|
| 352 |
+
|
| 353 |
+
return result
|
| 354 |
+
|
| 355 |
+
|
| 356 |
+
def _extract_risk_level(risk_text: str) -> str:
|
| 357 |
+
"""Extract risk level from risk analysis text using multiple patterns."""
|
| 358 |
+
print(f"📊 DEBUG _extract_risk_level input: {risk_text[:200]}...")
|
| 359 |
+
|
| 360 |
+
if not risk_text or not isinstance(risk_text, str):
|
| 361 |
+
print(f"📊 DEBUG Invalid input, returning UNKNOWN")
|
| 362 |
+
return "UNKNOWN"
|
| 363 |
+
|
| 364 |
+
text_upper = risk_text.upper()
|
| 365 |
+
|
| 366 |
+
# Try different patterns for risk levels
|
| 367 |
+
risk_patterns = [
|
| 368 |
+
# Direct matches
|
| 369 |
+
(r'\bRISK\s+LEVEL[:\s]+HIGH\b', 'HIGH'),
|
| 370 |
+
(r'\bRISK\s+LEVEL[:\s]+MEDIUM\b', 'MEDIUM'),
|
| 371 |
+
(r'\bRISK\s+LEVEL[:\s]+LOW\b', 'LOW'),
|
| 372 |
+
|
| 373 |
+
# Simple matches
|
| 374 |
+
(r'\bHIGH\s+RISK\b', 'HIGH'),
|
| 375 |
+
(r'\bMEDIUM\s+RISK\b', 'MEDIUM'),
|
| 376 |
+
(r'\bLOW\s+RISK\b', 'LOW'),
|
| 377 |
+
|
| 378 |
+
# Pattern: "Risk: HIGH" or "Risk Level: MEDIUM"
|
| 379 |
+
(r'\bRISK[:\s]+HIGH\b', 'HIGH'),
|
| 380 |
+
(r'\bRISK[:\s]+MEDIUM\b', 'MEDIUM'),
|
| 381 |
+
(r'\bRISK[:\s]+LOW\b', 'LOW'),
|
| 382 |
+
|
| 383 |
+
# Standalone mentions (fallback)
|
| 384 |
+
(r'\bHIGH\b', 'HIGH'),
|
| 385 |
+
(r'\bMEDIUM\b', 'MEDIUM'),
|
| 386 |
+
(r'\bLOW\b', 'LOW'),
|
| 387 |
+
]
|
| 388 |
+
|
| 389 |
+
import re
|
| 390 |
+
for pattern, level in risk_patterns:
|
| 391 |
+
if re.search(pattern, text_upper):
|
| 392 |
+
print(f"📊 DEBUG Found risk level '{level}' using pattern: {pattern}")
|
| 393 |
+
return level
|
| 394 |
+
|
| 395 |
+
print(f"📊 DEBUG No risk level found in text, returning UNKNOWN")
|
| 396 |
+
return "UNKNOWN"
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
def format_risk_table(risk_data: Dict) -> List[List[Any]]:
|
| 400 |
+
"""Prepare risk analysis table rows in list format for Gradio DataFrame."""
|
| 401 |
+
print(f"🔍 DEBUG format_risk_table input: {risk_data}")
|
| 402 |
+
|
| 403 |
+
if not risk_data:
|
| 404 |
+
print("🚨 DEBUG: No risk data provided")
|
| 405 |
+
return []
|
| 406 |
+
|
| 407 |
+
if not risk_data.get("success"):
|
| 408 |
+
print(f"🚨 DEBUG: Risk data processing failed: {risk_data}")
|
| 409 |
+
return []
|
| 410 |
+
|
| 411 |
+
clause_risks = risk_data.get("clause_risks", [])
|
| 412 |
+
print(f"🔍 DEBUG: Found {len(clause_risks)} clause risks")
|
| 413 |
+
|
| 414 |
+
rows = []
|
| 415 |
+
for i, risk in enumerate(clause_risks):
|
| 416 |
+
clause_id = risk.get("clause_id", f"clause_{i+1}")
|
| 417 |
+
clause_type = risk.get("clause_type", "General")
|
| 418 |
+
analysis = risk.get("risk_analysis", "")
|
| 419 |
+
risk_level = _extract_risk_level(analysis)
|
| 420 |
+
summary = analysis # Show full analysis text
|
| 421 |
+
|
| 422 |
+
print(f"📊 DEBUG Risk {i+1}: clause_id={clause_id}, clause_type={clause_type}, extracted_risk_level={risk_level}")
|
| 423 |
+
print(f"📊 DEBUG Risk {i+1}: analysis_preview={analysis[:200]}...")
|
| 424 |
+
|
| 425 |
+
# Create row as list matching headers: ["clause_id", "clause_type", "risk_level", "summary"]
|
| 426 |
+
row = [clause_id, clause_type, risk_level, summary]
|
| 427 |
+
rows.append(row)
|
| 428 |
+
print(f"🔍 DEBUG: Risk {i+1}: {clause_id} ({clause_type}) - {risk_level}")
|
| 429 |
+
|
| 430 |
+
print(f"🔍 DEBUG format_risk_table output: {len(rows)} rows")
|
| 431 |
+
return rows
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def format_risk_overview(risk_data: Dict) -> str:
|
| 435 |
+
"""Format overall risk summary with validation."""
|
| 436 |
+
if not risk_data.get("success"):
|
| 437 |
+
return "❌ Risk analysis failed"
|
| 438 |
+
|
| 439 |
+
overall_summary = risk_data.get("overall_summary", "")
|
| 440 |
+
disclaimer = risk_data.get("disclaimer", "")
|
| 441 |
+
clause_risks = risk_data.get("clause_risks", [])
|
| 442 |
+
|
| 443 |
+
# Validate consistency between overall and individual risk levels
|
| 444 |
+
validation_warnings = _validate_risk_consistency(overall_summary, clause_risks)
|
| 445 |
+
|
| 446 |
+
parts = ["⚠️ **Risk Analysis Overview:**"]
|
| 447 |
+
|
| 448 |
+
# Add validation warnings if any
|
| 449 |
+
if validation_warnings:
|
| 450 |
+
parts.append("🔍 **Validation Notices:**")
|
| 451 |
+
parts.extend([f"• {warning}" for warning in validation_warnings])
|
| 452 |
+
parts.append("---")
|
| 453 |
+
|
| 454 |
+
if overall_summary:
|
| 455 |
+
parts.append(overall_summary)
|
| 456 |
+
if disclaimer:
|
| 457 |
+
parts.append(f"\n_{disclaimer}_")
|
| 458 |
+
return "\n\n".join(parts)
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def _validate_risk_consistency(overall_summary: str, clause_risks: List[Dict]) -> List[str]:
|
| 462 |
+
"""Validate consistency between overall risk summary and individual clause risks."""
|
| 463 |
+
warnings = []
|
| 464 |
+
|
| 465 |
+
if not overall_summary or not clause_risks:
|
| 466 |
+
return warnings
|
| 467 |
+
|
| 468 |
+
# Extract overall risk level from summary
|
| 469 |
+
overall_risk_level = "UNKNOWN"
|
| 470 |
+
overall_upper = overall_summary.upper()
|
| 471 |
+
for level in ["HIGH", "MEDIUM", "LOW"]:
|
| 472 |
+
if f"OVERALL RISK LEVEL: {level}" in overall_upper or f"RISK LEVEL: {level}" in overall_upper:
|
| 473 |
+
overall_risk_level = level
|
| 474 |
+
break
|
| 475 |
+
|
| 476 |
+
# If not found in structured format, try fallback patterns
|
| 477 |
+
if overall_risk_level == "UNKNOWN":
|
| 478 |
+
overall_risk_patterns = [
|
| 479 |
+
(r'OVERALL.*RISK.*LEVEL.*HIGH', 'HIGH'),
|
| 480 |
+
(r'OVERALL.*RISK.*LEVEL.*MEDIUM', 'MEDIUM'),
|
| 481 |
+
(r'OVERALL.*RISK.*LEVEL.*LOW', 'LOW'),
|
| 482 |
+
(r'RISK.*LEVEL.*HIGH', 'HIGH'),
|
| 483 |
+
(r'RISK.*LEVEL.*MEDIUM', 'MEDIUM'),
|
| 484 |
+
(r'RISK.*LEVEL.*LOW', 'LOW'),
|
| 485 |
+
]
|
| 486 |
+
|
| 487 |
+
for pattern, level in overall_risk_patterns:
|
| 488 |
+
if re.search(pattern, overall_upper):
|
| 489 |
+
overall_risk_level = level
|
| 490 |
+
break
|
| 491 |
+
|
| 492 |
+
# Extract individual clause risk levels
|
| 493 |
+
individual_levels = []
|
| 494 |
+
for clause_risk in clause_risks:
|
| 495 |
+
analysis = clause_risk.get("risk_analysis", "")
|
| 496 |
+
extracted_level = _extract_risk_level(analysis)
|
| 497 |
+
if extracted_level != "UNKNOWN":
|
| 498 |
+
individual_levels.append(extracted_level)
|
| 499 |
+
|
| 500 |
+
print(f"📊 DEBUG Risk Validation - Overall: {overall_risk_level}, Individual: {individual_levels}")
|
| 501 |
+
|
| 502 |
+
# Check for consistency
|
| 503 |
+
if overall_risk_level != "UNKNOWN" and individual_levels:
|
| 504 |
+
# Check if overall level makes sense given individual levels
|
| 505 |
+
has_high = "HIGH" in individual_levels
|
| 506 |
+
has_medium = "MEDIUM" in individual_levels
|
| 507 |
+
|
| 508 |
+
if overall_risk_level == "LOW" and has_high:
|
| 509 |
+
warnings.append(f"Overall risk shows {overall_risk_level} but found HIGH risk clauses")
|
| 510 |
+
elif overall_risk_level == "LOW" and has_medium:
|
| 511 |
+
warnings.append(f"Overall risk shows {overall_risk_level} but found MEDIUM risk clauses")
|
| 512 |
+
elif overall_risk_level == "HIGH" and not has_high and not has_medium:
|
| 513 |
+
warnings.append(f"Overall risk shows {overall_risk_level} but no HIGH or MEDIUM risk clauses found")
|
| 514 |
+
|
| 515 |
+
# Check for missing individual risk classifications
|
| 516 |
+
unknown_count = sum(1 for clause_risk in clause_risks if _extract_risk_level(clause_risk.get("risk_analysis", "")) == "UNKNOWN")
|
| 517 |
+
if unknown_count > 0:
|
| 518 |
+
warnings.append(f"{unknown_count} clause(s) have unclear risk levels")
|
| 519 |
+
|
| 520 |
+
return warnings
|
| 521 |
+
|
| 522 |
+
|
| 523 |
+
def extract_metadata_fields(classification_data: Dict) -> Dict[str, str]:
|
| 524 |
+
"""Pull key metadata fields for sidebar display."""
|
| 525 |
+
# Debug print to see what we're getting
|
| 526 |
+
print(f"🔍 DEBUG extract_metadata_fields input: {classification_data}")
|
| 527 |
+
|
| 528 |
+
# The classification data comes nested under "classification" key
|
| 529 |
+
classification = classification_data.get("classification", {}) if classification_data else {}
|
| 530 |
+
|
| 531 |
+
# If the classification is stored as a string (JSON), parse it using the same logic as the table
|
| 532 |
+
if isinstance(classification, str):
|
| 533 |
+
try:
|
| 534 |
+
# Use the same JSON cleaning logic as format_classification_table
|
| 535 |
+
cleaned = _clean_json_response_metadata(classification)
|
| 536 |
+
classification = json.loads(cleaned)
|
| 537 |
+
print(f"🔍 DEBUG extract_metadata_fields: Successfully parsed JSON from string")
|
| 538 |
+
except Exception as e:
|
| 539 |
+
print(f"🔍 DEBUG extract_metadata_fields: JSON parsing failed: {e}")
|
| 540 |
+
# Try to extract from raw text
|
| 541 |
+
return _extract_from_raw_text(classification)
|
| 542 |
+
|
| 543 |
+
# If classification is still nested under another "classification" key (from LLM response)
|
| 544 |
+
if isinstance(classification, dict) and "classification" in classification:
|
| 545 |
+
classification = classification["classification"]
|
| 546 |
+
|
| 547 |
+
# Handle the case where JSON parsing failed and we only have raw_analysis
|
| 548 |
+
if isinstance(classification, dict) and "raw_analysis" in classification and len(classification) == 1:
|
| 549 |
+
# Try to extract metadata from the raw text analysis
|
| 550 |
+
raw_text = classification["raw_analysis"]
|
| 551 |
+
print(f"🔍 DEBUG: Extracting from raw_analysis: {raw_text[:200]}...")
|
| 552 |
+
return _extract_from_raw_text(raw_text)
|
| 553 |
+
|
| 554 |
+
# Extract deed type
|
| 555 |
+
deed_type = classification.get("deed_type") or classification.get("type") or "N/A"
|
| 556 |
+
|
| 557 |
+
# Extract jurisdiction - handle both dict and string formats
|
| 558 |
+
jurisdiction_value = classification.get("jurisdiction") or classification.get("jurisdiction_hint") or "N/A"
|
| 559 |
+
if isinstance(jurisdiction_value, dict):
|
| 560 |
+
# Format nested jurisdiction nicely
|
| 561 |
+
country = jurisdiction_value.get("country", "")
|
| 562 |
+
state = jurisdiction_value.get("state_province", "") or jurisdiction_value.get("state", "")
|
| 563 |
+
if country and state:
|
| 564 |
+
jurisdiction = f"{country}, {state}"
|
| 565 |
+
elif country:
|
| 566 |
+
jurisdiction = country
|
| 567 |
+
else:
|
| 568 |
+
jurisdiction = json.dumps(jurisdiction_value, indent=2)
|
| 569 |
+
elif isinstance(jurisdiction_value, list):
|
| 570 |
+
jurisdiction = json.dumps(jurisdiction_value, indent=2)
|
| 571 |
+
else:
|
| 572 |
+
jurisdiction = str(jurisdiction_value)
|
| 573 |
+
|
| 574 |
+
# Extract parties information
|
| 575 |
+
parties = classification.get("key_parties") or classification.get("parties") or {}
|
| 576 |
+
if isinstance(parties, dict) and parties:
|
| 577 |
+
# Format parties information nicely
|
| 578 |
+
parts = []
|
| 579 |
+
if "grantor" in parties:
|
| 580 |
+
grantor = parties["grantor"]
|
| 581 |
+
if isinstance(grantor, dict):
|
| 582 |
+
name = grantor.get("name", "")
|
| 583 |
+
if name:
|
| 584 |
+
parts.append(f"Grantor: {name}")
|
| 585 |
+
if "grantee" in parties:
|
| 586 |
+
grantee = parties["grantee"]
|
| 587 |
+
if isinstance(grantee, dict):
|
| 588 |
+
name = grantee.get("name", "")
|
| 589 |
+
if name:
|
| 590 |
+
parts.append(f"Grantee: {name}")
|
| 591 |
+
parties_str = "\n".join(parts) if parts else json.dumps(parties, indent=2)
|
| 592 |
+
else:
|
| 593 |
+
parties_str = "N/A"
|
| 594 |
+
|
| 595 |
+
# Extract property description
|
| 596 |
+
property_desc = (
|
| 597 |
+
classification.get("property_description_and_location")
|
| 598 |
+
or classification.get("property_description")
|
| 599 |
+
or classification.get("property")
|
| 600 |
+
or "N/A"
|
| 601 |
+
)
|
| 602 |
+
if isinstance(property_desc, dict):
|
| 603 |
+
# Format property info nicely
|
| 604 |
+
parts = []
|
| 605 |
+
if "district" in property_desc:
|
| 606 |
+
parts.append(f"District: {property_desc['district']}")
|
| 607 |
+
if "upazila_thana" in property_desc:
|
| 608 |
+
parts.append(f"Area: {property_desc['upazila_thana']}")
|
| 609 |
+
if "mouza" in property_desc:
|
| 610 |
+
parts.append(f"Mouza: {property_desc['mouza']}")
|
| 611 |
+
if "area" in property_desc:
|
| 612 |
+
parts.append(f"Size: {property_desc['area']}")
|
| 613 |
+
property_str = "\n".join(parts) if parts else json.dumps(property_desc, indent=2)
|
| 614 |
+
elif isinstance(property_desc, list):
|
| 615 |
+
property_str = json.dumps(property_desc, indent=2)
|
| 616 |
+
else:
|
| 617 |
+
property_str = str(property_desc)
|
| 618 |
+
|
| 619 |
+
# Extract consideration/price
|
| 620 |
+
consideration = classification.get("consideration_amount") or classification.get("consideration") or "N/A"
|
| 621 |
+
|
| 622 |
+
result = {
|
| 623 |
+
"deed_type": deed_type,
|
| 624 |
+
"jurisdiction": jurisdiction,
|
| 625 |
+
"parties": parties_str,
|
| 626 |
+
"property": property_str,
|
| 627 |
+
"consideration": str(consideration),
|
| 628 |
+
}
|
| 629 |
+
|
| 630 |
+
print(f"🔍 DEBUG extract_metadata_fields result: {result}")
|
| 631 |
+
return result
|
| 632 |
+
|
| 633 |
+
|
| 634 |
+
def _extract_from_raw_text(raw_text: str) -> Dict[str, str]:
|
| 635 |
+
"""Extract metadata from raw text analysis when JSON parsing fails."""
|
| 636 |
+
print(f"🔍 DEBUG: Attempting to extract from raw text: {raw_text[:300]}...")
|
| 637 |
+
|
| 638 |
+
# Initialize with default values
|
| 639 |
+
result = {
|
| 640 |
+
"deed_type": "N/A",
|
| 641 |
+
"jurisdiction": "N/A",
|
| 642 |
+
"parties": "N/A",
|
| 643 |
+
"property": "N/A",
|
| 644 |
+
"consideration": "N/A"
|
| 645 |
+
}
|
| 646 |
+
|
| 647 |
+
# Try to extract deed type
|
| 648 |
+
deed_type_patterns = [
|
| 649 |
+
r"deed\s+type[:\-\s]+([\w\s]+?)(?:\n|$|;|,)",
|
| 650 |
+
r"type\s+of\s+deed[:\-\s]+([\w\s]+?)(?:\n|$|;|,)",
|
| 651 |
+
r"this\s+is\s+a[n]?\s+([\w\s]+?)\s+deed",
|
| 652 |
+
r"(sale|mortgage|lease|gift|warranty|quitclaim)\s+deed"
|
| 653 |
+
]
|
| 654 |
+
|
| 655 |
+
for pattern in deed_type_patterns:
|
| 656 |
+
match = re.search(pattern, raw_text, re.IGNORECASE)
|
| 657 |
+
if match:
|
| 658 |
+
result["deed_type"] = match.group(1).strip().title()
|
| 659 |
+
break
|
| 660 |
+
|
| 661 |
+
# Try to extract jurisdiction
|
| 662 |
+
jurisdiction_patterns = [
|
| 663 |
+
r"jurisdiction[:\-\s]+([\w\s,]+?)(?:\n|$)",
|
| 664 |
+
r"state[:\-\s]+([\w\s,]+?)(?:\n|$)",
|
| 665 |
+
r"country[:\-\s]+([\w\s,]+?)(?:\n|$)",
|
| 666 |
+
r"location[:\-\s]+([\w\s,]+?)(?:\n|$)"
|
| 667 |
+
]
|
| 668 |
+
|
| 669 |
+
for pattern in jurisdiction_patterns:
|
| 670 |
+
match = re.search(pattern, raw_text, re.IGNORECASE)
|
| 671 |
+
if match:
|
| 672 |
+
result["jurisdiction"] = match.group(1).strip()
|
| 673 |
+
break
|
| 674 |
+
|
| 675 |
+
# Try to extract parties
|
| 676 |
+
parties_patterns = [
|
| 677 |
+
r"grantor[:\-\s]+([\w\s,]+?)(?:,\s*resident|$|\n)",
|
| 678 |
+
r"grantee[:\-\s]+([\w\s,]+?)(?:,\s*resident|$|\n)",
|
| 679 |
+
r"seller[:\-\s]+([\w\s,]+?)(?:,\s*resident|$|\n)",
|
| 680 |
+
r"buyer[:\-\s]+([\w\s,]+?)(?:,\s*resident|$|\n)"
|
| 681 |
+
]
|
| 682 |
+
|
| 683 |
+
parties_found = []
|
| 684 |
+
for pattern in parties_patterns:
|
| 685 |
+
matches = re.finditer(pattern, raw_text, re.IGNORECASE)
|
| 686 |
+
for match in matches:
|
| 687 |
+
# Extract role and name
|
| 688 |
+
full_match = match.group(0).strip()
|
| 689 |
+
name = match.group(1).strip()
|
| 690 |
+
role = full_match.split(':')[0].strip().title()
|
| 691 |
+
party_info = f"{role}: {name}"
|
| 692 |
+
if party_info not in parties_found and name:
|
| 693 |
+
parties_found.append(party_info)
|
| 694 |
+
|
| 695 |
+
if parties_found:
|
| 696 |
+
result["parties"] = "\n".join(parties_found)
|
| 697 |
+
|
| 698 |
+
# Try to extract property info
|
| 699 |
+
property_patterns = [
|
| 700 |
+
r"property[:\-\s]+([\w\s,]+?)(?:\n|$)",
|
| 701 |
+
r"district[:\-\s]+([\w\s]+?)(?:\n|$)",
|
| 702 |
+
r"area[:\-\s]+([\d\.\s\w]+?)(?:\n|$)"
|
| 703 |
+
]
|
| 704 |
+
|
| 705 |
+
property_found = []
|
| 706 |
+
for pattern in property_patterns:
|
| 707 |
+
matches = re.finditer(pattern, raw_text, re.IGNORECASE)
|
| 708 |
+
for match in matches:
|
| 709 |
+
prop_info = match.group(0).strip()
|
| 710 |
+
if prop_info not in property_found:
|
| 711 |
+
property_found.append(prop_info)
|
| 712 |
+
|
| 713 |
+
if property_found:
|
| 714 |
+
result["property"] = "\n".join(property_found)
|
| 715 |
+
|
| 716 |
+
# Try to extract consideration/amount
|
| 717 |
+
consideration_patterns = [
|
| 718 |
+
r"consideration[:\-\s]+([\d,\.\s\w]+?)(?:\n|$)",
|
| 719 |
+
r"amount[:\-\s]+([\d,\.\s\w]+?)(?:\n|$)",
|
| 720 |
+
r"price[:\-\s]+([\d,\.\s\w]+?)(?:\n|$)",
|
| 721 |
+
r"(\d+[,\d]*\s*(?:taka|dollars?|usd|€|£|\$))"
|
| 722 |
+
]
|
| 723 |
+
|
| 724 |
+
for pattern in consideration_patterns:
|
| 725 |
+
match = re.search(pattern, raw_text, re.IGNORECASE)
|
| 726 |
+
if match:
|
| 727 |
+
result["consideration"] = match.group(1).strip()
|
| 728 |
+
break
|
| 729 |
+
|
| 730 |
+
print(f"🔍 DEBUG: Extracted from raw text: {result}")
|
| 731 |
+
return result
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
def _clean_json_response_metadata(response: str) -> str:
|
| 735 |
+
"""Clean JSON response for metadata extraction (same as main.py logic)."""
|
| 736 |
+
cleaned = response.strip()
|
| 737 |
+
|
| 738 |
+
# Remove code fences if present
|
| 739 |
+
if cleaned.startswith("```"):
|
| 740 |
+
lines = cleaned.split("\n")
|
| 741 |
+
lines = lines[1:] # Remove first line
|
| 742 |
+
if lines and lines[-1].strip() == "```":
|
| 743 |
+
lines = lines[:-1] # Remove last line
|
| 744 |
+
cleaned = "\n".join(lines).strip()
|
| 745 |
+
|
| 746 |
+
# Look for JSON object boundaries
|
| 747 |
+
start_idx = cleaned.find("{")
|
| 748 |
+
end_idx = cleaned.rfind("}") + 1
|
| 749 |
+
|
| 750 |
+
if start_idx != -1 and end_idx > start_idx:
|
| 751 |
+
cleaned = cleaned[start_idx:end_idx]
|
| 752 |
+
|
| 753 |
+
return cleaned.strip()
|
| 754 |
+
|
| 755 |
+
|
| 756 |
+
def build_report_pdf(report: Dict[str, Any]) -> str:
|
| 757 |
+
"""Create a professional PDF report for download."""
|
| 758 |
+
from reportlab.lib.pagesizes import letter
|
| 759 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 760 |
+
from reportlab.lib.units import inch
|
| 761 |
+
from reportlab.lib.enums import TA_LEFT, TA_CENTER
|
| 762 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
|
| 763 |
+
from reportlab.lib import colors
|
| 764 |
+
|
| 765 |
+
# Create temporary PDF file
|
| 766 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".pdf")
|
| 767 |
+
pdf_path = tmp.name
|
| 768 |
+
tmp.close()
|
| 769 |
+
|
| 770 |
+
# Create PDF document
|
| 771 |
+
doc = SimpleDocTemplate(pdf_path, pagesize=letter,
|
| 772 |
+
rightMargin=72, leftMargin=72,
|
| 773 |
+
topMargin=72, bottomMargin=18)
|
| 774 |
+
|
| 775 |
+
# Container for the 'Flowable' objects
|
| 776 |
+
elements = []
|
| 777 |
+
|
| 778 |
+
# Define styles
|
| 779 |
+
styles = getSampleStyleSheet()
|
| 780 |
+
title_style = ParagraphStyle(
|
| 781 |
+
'CustomTitle',
|
| 782 |
+
parent=styles['Heading1'],
|
| 783 |
+
fontSize=24,
|
| 784 |
+
textColor=colors.HexColor('#1f2937'),
|
| 785 |
+
spaceAfter=30,
|
| 786 |
+
alignment=TA_CENTER
|
| 787 |
+
)
|
| 788 |
+
heading_style = ParagraphStyle(
|
| 789 |
+
'CustomHeading',
|
| 790 |
+
parent=styles['Heading2'],
|
| 791 |
+
fontSize=16,
|
| 792 |
+
textColor=colors.HexColor('#374151'),
|
| 793 |
+
spaceAfter=12,
|
| 794 |
+
spaceBefore=12
|
| 795 |
+
)
|
| 796 |
+
normal_style = styles['BodyText']
|
| 797 |
+
|
| 798 |
+
# Title
|
| 799 |
+
elements.append(Paragraph("Legal Deed Analysis Report", title_style))
|
| 800 |
+
elements.append(Spacer(1, 0.2*inch))
|
| 801 |
+
|
| 802 |
+
# Disclaimer
|
| 803 |
+
disclaimer_text = """
|
| 804 |
+
<b>LEGAL DISCLAIMER:</b> This automated analysis is for informational purposes only
|
| 805 |
+
and does not constitute legal advice. Always consult with a qualified attorney licensed
|
| 806 |
+
in your jurisdiction before making decisions based on deed documents.
|
| 807 |
+
"""
|
| 808 |
+
elements.append(Paragraph(disclaimer_text, normal_style))
|
| 809 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 810 |
+
|
| 811 |
+
# Deed Classification
|
| 812 |
+
elements.append(Paragraph("Deed Classification", heading_style))
|
| 813 |
+
classification_data = report.get("deed_classification", {})
|
| 814 |
+
if classification_data.get("success"):
|
| 815 |
+
classification = classification_data.get("classification", {})
|
| 816 |
+
|
| 817 |
+
# Extract key info
|
| 818 |
+
deed_type = classification.get("deed_type", "N/A")
|
| 819 |
+
jurisdiction = classification.get("jurisdiction", {})
|
| 820 |
+
if isinstance(jurisdiction, dict):
|
| 821 |
+
jurisdiction_str = f"{jurisdiction.get('country', 'N/A')}, {jurisdiction.get('state_province', 'N/A')}"
|
| 822 |
+
else:
|
| 823 |
+
jurisdiction_str = str(jurisdiction)
|
| 824 |
+
|
| 825 |
+
consideration = classification.get("consideration_amount", "N/A")
|
| 826 |
+
date_exec = classification.get("date_of_execution", "N/A")
|
| 827 |
+
|
| 828 |
+
# Create header style for bold headers
|
| 829 |
+
header_style = ParagraphStyle(
|
| 830 |
+
'TableHeader',
|
| 831 |
+
parent=normal_style,
|
| 832 |
+
fontName='Helvetica-Bold',
|
| 833 |
+
fontSize=11
|
| 834 |
+
)
|
| 835 |
+
|
| 836 |
+
classification_table_data = [
|
| 837 |
+
[Paragraph("Field", header_style), Paragraph("Value", header_style)],
|
| 838 |
+
["Deed Type", str(deed_type)],
|
| 839 |
+
["Jurisdiction", jurisdiction_str],
|
| 840 |
+
["Consideration", str(consideration)],
|
| 841 |
+
["Date of Execution", str(date_exec)]
|
| 842 |
+
]
|
| 843 |
+
|
| 844 |
+
classification_table = Table(classification_table_data, colWidths=[2*inch, 4*inch])
|
| 845 |
+
classification_table.setStyle(TableStyle([
|
| 846 |
+
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e5e7eb')),
|
| 847 |
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
|
| 848 |
+
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
| 849 |
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 850 |
+
('FONTSIZE', (0, 0), (-1, 0), 12),
|
| 851 |
+
('BOTTOMPADDING', (0, 0), (-1, 0), 12),
|
| 852 |
+
('GRID', (0, 0), (-1, -1), 1, colors.grey),
|
| 853 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 854 |
+
]))
|
| 855 |
+
|
| 856 |
+
elements.append(classification_table)
|
| 857 |
+
else:
|
| 858 |
+
elements.append(Paragraph("Classification data not available", normal_style))
|
| 859 |
+
|
| 860 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 861 |
+
|
| 862 |
+
# Risk Overview - Properly formatted
|
| 863 |
+
elements.append(Paragraph("Risk Analysis Overview", heading_style))
|
| 864 |
+
risk_data = report.get("risk_analysis", {})
|
| 865 |
+
if risk_data.get("success"):
|
| 866 |
+
overall_summary = risk_data.get("overall_summary", "No summary available")
|
| 867 |
+
|
| 868 |
+
# Parse and format the structured risk overview
|
| 869 |
+
lines = overall_summary.split('\n')
|
| 870 |
+
|
| 871 |
+
# Create styles for different sections
|
| 872 |
+
section_style = ParagraphStyle(
|
| 873 |
+
'SectionHeading',
|
| 874 |
+
parent=styles['Heading3'],
|
| 875 |
+
fontSize=12,
|
| 876 |
+
textColor=colors.HexColor('#374151'),
|
| 877 |
+
spaceAfter=6,
|
| 878 |
+
spaceBefore=10,
|
| 879 |
+
fontName='Helvetica-Bold'
|
| 880 |
+
)
|
| 881 |
+
|
| 882 |
+
bullet_style = ParagraphStyle(
|
| 883 |
+
'BulletText',
|
| 884 |
+
parent=normal_style,
|
| 885 |
+
fontSize=10,
|
| 886 |
+
leftIndent=20,
|
| 887 |
+
bulletIndent=10,
|
| 888 |
+
spaceAfter=4
|
| 889 |
+
)
|
| 890 |
+
|
| 891 |
+
i = 0
|
| 892 |
+
while i < len(lines):
|
| 893 |
+
line = lines[i].strip()
|
| 894 |
+
|
| 895 |
+
# Skip empty lines
|
| 896 |
+
if not line:
|
| 897 |
+
i += 1
|
| 898 |
+
continue
|
| 899 |
+
|
| 900 |
+
# Check for structured sections
|
| 901 |
+
if line.startswith("OVERALL RISK LEVEL:"):
|
| 902 |
+
# Extract and highlight risk level
|
| 903 |
+
risk_level_text = line.replace("OVERALL RISK LEVEL:", "").strip()
|
| 904 |
+
if "HIGH" in risk_level_text.upper():
|
| 905 |
+
risk_color = colors.red
|
| 906 |
+
elif "MEDIUM" in risk_level_text.upper():
|
| 907 |
+
risk_color = colors.orange
|
| 908 |
+
else:
|
| 909 |
+
risk_color = colors.green
|
| 910 |
+
|
| 911 |
+
elements.append(Paragraph(
|
| 912 |
+
f"<b>OVERALL RISK LEVEL:</b> <font color='{risk_color.hexval()}'><b>{risk_level_text}</b></font>",
|
| 913 |
+
section_style
|
| 914 |
+
))
|
| 915 |
+
|
| 916 |
+
elif line.startswith("KEY FINDINGS:"):
|
| 917 |
+
elements.append(Paragraph("<b>Key Findings:</b>", section_style))
|
| 918 |
+
# Collect bullet points
|
| 919 |
+
i += 1
|
| 920 |
+
while i < len(lines) and lines[i].strip().startswith('-'):
|
| 921 |
+
bullet_text = lines[i].strip()[1:].strip() # Remove '-' and whitespace
|
| 922 |
+
elements.append(Paragraph(f"• {bullet_text}", bullet_style))
|
| 923 |
+
i += 1
|
| 924 |
+
i -= 1 # Back up one since we'll increment at the end of loop
|
| 925 |
+
|
| 926 |
+
elif line.startswith("RISK CATEGORIES FOUND:"):
|
| 927 |
+
categories = line.replace("RISK CATEGORIES FOUND:", "").strip()
|
| 928 |
+
elements.append(Paragraph(f"<b>Risk Categories Found:</b> {categories}", section_style))
|
| 929 |
+
|
| 930 |
+
elif line.startswith("RECOMMENDATIONS:"):
|
| 931 |
+
elements.append(Paragraph("<b>Recommendations:</b>", section_style))
|
| 932 |
+
# Collect bullet points
|
| 933 |
+
i += 1
|
| 934 |
+
while i < len(lines) and lines[i].strip().startswith('-'):
|
| 935 |
+
bullet_text = lines[i].strip()[1:].strip()
|
| 936 |
+
elements.append(Paragraph(f"• {bullet_text}", bullet_style))
|
| 937 |
+
i += 1
|
| 938 |
+
i -= 1
|
| 939 |
+
|
| 940 |
+
elif line.startswith("DISCLAIMER:"):
|
| 941 |
+
disclaimer_text = line.replace("DISCLAIMER:", "").strip()
|
| 942 |
+
# Collect any continuation lines
|
| 943 |
+
full_disclaimer = [disclaimer_text]
|
| 944 |
+
i += 1
|
| 945 |
+
while i < len(lines) and lines[i].strip() and not any(lines[i].strip().startswith(s) for s in ["OVERALL", "KEY", "RISK", "RECOMMENDATIONS"]):
|
| 946 |
+
full_disclaimer.append(lines[i].strip())
|
| 947 |
+
i += 1
|
| 948 |
+
i -= 1
|
| 949 |
+
|
| 950 |
+
disclaimer_style = ParagraphStyle(
|
| 951 |
+
'Disclaimer',
|
| 952 |
+
parent=normal_style,
|
| 953 |
+
fontSize=9,
|
| 954 |
+
textColor=colors.HexColor('#6b7280'),
|
| 955 |
+
fontName='Helvetica-Oblique',
|
| 956 |
+
spaceAfter=10,
|
| 957 |
+
spaceBefore=10
|
| 958 |
+
)
|
| 959 |
+
elements.append(Paragraph(f"<b>Disclaimer:</b> {' '.join(full_disclaimer)}", disclaimer_style))
|
| 960 |
+
|
| 961 |
+
else:
|
| 962 |
+
# Regular paragraph
|
| 963 |
+
if line:
|
| 964 |
+
elements.append(Paragraph(line, normal_style))
|
| 965 |
+
|
| 966 |
+
i += 1
|
| 967 |
+
else:
|
| 968 |
+
elements.append(Paragraph("Risk analysis not available", normal_style))
|
| 969 |
+
|
| 970 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 971 |
+
|
| 972 |
+
# Clause Breakdown (first 15)
|
| 973 |
+
elements.append(Paragraph("Clause Breakdown (Summary)", heading_style))
|
| 974 |
+
clauses = report.get("clause_breakdown", {}).get("clauses", [])
|
| 975 |
+
if clauses:
|
| 976 |
+
# Create small font style for table cells
|
| 977 |
+
small_style = ParagraphStyle(
|
| 978 |
+
'SmallText',
|
| 979 |
+
parent=normal_style,
|
| 980 |
+
fontSize=8,
|
| 981 |
+
leading=10
|
| 982 |
+
)
|
| 983 |
+
|
| 984 |
+
clause_table_data = [[Paragraph("<b>ID</b>", normal_style),
|
| 985 |
+
Paragraph("<b>Type</b>", normal_style),
|
| 986 |
+
Paragraph("<b>Preview</b>", normal_style)]]
|
| 987 |
+
|
| 988 |
+
for clause in clauses[:15]:
|
| 989 |
+
clause_id = clause.get('id', '')
|
| 990 |
+
clause_type = clause.get('type', 'General')
|
| 991 |
+
clause_text = clause.get('text', '') # NO TRUNCATION - full text
|
| 992 |
+
|
| 993 |
+
# Use Paragraph objects for proper wrapping
|
| 994 |
+
clause_table_data.append([
|
| 995 |
+
Paragraph(clause_id, small_style),
|
| 996 |
+
Paragraph(clause_type, small_style),
|
| 997 |
+
Paragraph(clause_text, small_style) # Full text with wrapping
|
| 998 |
+
])
|
| 999 |
+
|
| 1000 |
+
clause_table = Table(clause_table_data, colWidths=[0.6*inch, 1.2*inch, 4.2*inch])
|
| 1001 |
+
clause_table.setStyle(TableStyle([
|
| 1002 |
+
('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#e5e7eb')),
|
| 1003 |
+
('TEXTCOLOR', (0, 0), (-1, 0), colors.black),
|
| 1004 |
+
('ALIGN', (0, 0), (-1, -1), 'LEFT'),
|
| 1005 |
+
('VALIGN', (0, 0), (-1, -1), 'TOP'),
|
| 1006 |
+
('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
|
| 1007 |
+
('FONTSIZE', (0, 0), (-1, 0), 10),
|
| 1008 |
+
('BOTTOMPADDING', (0, 0), (-1, 0), 8),
|
| 1009 |
+
('TOPPADDING', (0, 1), (-1, -1), 6),
|
| 1010 |
+
('BOTTOMPADDING', (0, 1), (-1, -1), 6),
|
| 1011 |
+
('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
|
| 1012 |
+
('WORDWRAP', (0, 0), (-1, -1), True), # Enable word wrapping
|
| 1013 |
+
]))
|
| 1014 |
+
|
| 1015 |
+
elements.append(clause_table)
|
| 1016 |
+
else:
|
| 1017 |
+
elements.append(Paragraph("No clauses detected", normal_style))
|
| 1018 |
+
|
| 1019 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 1020 |
+
|
| 1021 |
+
# Page break before detailed risk analysis
|
| 1022 |
+
elements.append(PageBreak())
|
| 1023 |
+
|
| 1024 |
+
# Detailed Risk Analysis
|
| 1025 |
+
elements.append(Paragraph("Detailed Risk Analysis by Clause", heading_style))
|
| 1026 |
+
clause_risks = risk_data.get("clause_risks", [])
|
| 1027 |
+
if clause_risks:
|
| 1028 |
+
for idx, risk in enumerate(clause_risks[:10], 1): # Limit to first 10
|
| 1029 |
+
clause_id = risk.get("clause_id", "Unknown")
|
| 1030 |
+
clause_type = risk.get("clause_type", "General")
|
| 1031 |
+
risk_analysis = risk.get("risk_analysis", "No analysis available")
|
| 1032 |
+
|
| 1033 |
+
# Extract risk level
|
| 1034 |
+
risk_level = "UNKNOWN"
|
| 1035 |
+
if "RISK LEVEL: HIGH" in risk_analysis.upper():
|
| 1036 |
+
risk_level = "HIGH"
|
| 1037 |
+
risk_color = colors.red
|
| 1038 |
+
elif "RISK LEVEL: MEDIUM" in risk_analysis.upper():
|
| 1039 |
+
risk_level = "MEDIUM"
|
| 1040 |
+
risk_color = colors.orange
|
| 1041 |
+
elif "RISK LEVEL: LOW" in risk_analysis.upper():
|
| 1042 |
+
risk_level = "LOW"
|
| 1043 |
+
risk_color = colors.green
|
| 1044 |
+
else:
|
| 1045 |
+
risk_color = colors.grey
|
| 1046 |
+
|
| 1047 |
+
# Create risk header
|
| 1048 |
+
risk_header = f"<b>Clause {clause_id}</b> ({clause_type}) - <font color='{risk_color.hexval()}'>Risk: {risk_level}</font>"
|
| 1049 |
+
elements.append(Paragraph(risk_header, normal_style))
|
| 1050 |
+
elements.append(Spacer(1, 0.1*inch))
|
| 1051 |
+
|
| 1052 |
+
# Add full risk analysis with proper text wrapping (no truncation)
|
| 1053 |
+
# Split into sections for better formatting
|
| 1054 |
+
sections = risk_analysis.split('\n')
|
| 1055 |
+
for section in sections[:10]: # Limit to first 10 sections to prevent overly long output
|
| 1056 |
+
if section.strip():
|
| 1057 |
+
# Use Paragraphs for proper text wrapping
|
| 1058 |
+
elements.append(Paragraph(section.strip(), normal_style))
|
| 1059 |
+
|
| 1060 |
+
elements.append(Spacer(1, 0.3*inch))
|
| 1061 |
+
else:
|
| 1062 |
+
elements.append(Paragraph("No detailed risk analysis available", normal_style))
|
| 1063 |
+
|
| 1064 |
+
# Build PDF
|
| 1065 |
+
doc.build(elements)
|
| 1066 |
+
|
| 1067 |
+
return pdf_path
|
| 1068 |
+
|
| 1069 |
+
|
| 1070 |
+
|
| 1071 |
+
@app.post("/analyze-deed", response_model=Dict[str, Any])
|
| 1072 |
+
async def analyze_deed_endpoint(file: UploadFile = File(...)):
|
| 1073 |
+
"""FastAPI endpoint for deed analysis."""
|
| 1074 |
+
try:
|
| 1075 |
+
# Validate file type
|
| 1076 |
+
if not file.content_type or not file.content_type == "application/pdf":
|
| 1077 |
+
raise HTTPException(status_code=400, detail="Only PDF files are supported")
|
| 1078 |
+
|
| 1079 |
+
# Read file content
|
| 1080 |
+
content = await file.read()
|
| 1081 |
+
|
| 1082 |
+
# Save temporarily and process
|
| 1083 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 1084 |
+
tmp_file.write(content)
|
| 1085 |
+
tmp_path = tmp_file.name
|
| 1086 |
+
|
| 1087 |
+
try:
|
| 1088 |
+
# Generate comprehensive report
|
| 1089 |
+
report_result = await generate_comprehensive_deed_report(tmp_path)
|
| 1090 |
+
report_data = json.loads(report_result)
|
| 1091 |
+
|
| 1092 |
+
return {
|
| 1093 |
+
"success": report_data.get("success", False),
|
| 1094 |
+
"filename": file.filename,
|
| 1095 |
+
"file_size": len(content),
|
| 1096 |
+
"report": report_data
|
| 1097 |
+
}
|
| 1098 |
+
finally:
|
| 1099 |
+
os.unlink(tmp_path)
|
| 1100 |
+
|
| 1101 |
+
except HTTPException:
|
| 1102 |
+
raise
|
| 1103 |
+
except Exception as e:
|
| 1104 |
+
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)}")
|
| 1105 |
+
|
| 1106 |
+
|
| 1107 |
+
# Create Gradio interface (Gradio 6 compatible)
|
| 1108 |
+
# Note: Theme will be applied in the .launch() method below
|
| 1109 |
+
with gr.Blocks() as gradio_app:
|
| 1110 |
+
|
| 1111 |
+
# Header
|
| 1112 |
+
gr.Markdown("# ⚖️ Legal Deed Review System")
|
| 1113 |
+
gr.Markdown("Upload a PDF deed document to receive comprehensive legal risk analysis.")
|
| 1114 |
+
|
| 1115 |
+
# Legal disclaimer with custom styling using HTML - formatted with checkmarks
|
| 1116 |
+
gr.HTML("""
|
| 1117 |
+
<div style="
|
| 1118 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
|
| 1119 |
+
border: 2px solid #5a67d8;
|
| 1120 |
+
border-radius: 10px;
|
| 1121 |
+
padding: 20px;
|
| 1122 |
+
margin: 20px 0;
|
| 1123 |
+
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.3);
|
| 1124 |
+
">
|
| 1125 |
+
<h3 style="color: #fff; margin-top: 0; margin-bottom: 15px; display: flex; align-items: center;">
|
| 1126 |
+
⚖️ LEGAL DISCLAIMER
|
| 1127 |
+
</h3>
|
| 1128 |
+
<div style="color: #e6e6ff; line-height: 1.8; font-size: 14px;">
|
| 1129 |
+
<p style="margin-bottom: 12px; font-weight: 500;">This is an automated analysis tool for informational purposes only.</p>
|
| 1130 |
+
<ul style="list-style: none; padding-left: 0; margin: 10px 0;">
|
| 1131 |
+
<li style="margin-bottom: 8px;">✅ This does NOT constitute legal advice</li>
|
| 1132 |
+
<li style="margin-bottom: 8px;">✅ This does NOT replace consultation with a qualified attorney</li>
|
| 1133 |
+
<li style="margin-bottom: 8px;">✅ This analysis may NOT identify all potential legal issues</li>
|
| 1134 |
+
<li style="margin-bottom: 8px;">✅ Always have deeds reviewed by a licensed attorney before taking action</li>
|
| 1135 |
+
<li style="margin-bottom: 8px;">✅ Consult local legal professionals familiar with your jurisdiction</li>
|
| 1136 |
+
</ul>
|
| 1137 |
+
<p style="margin-top: 15px; font-weight: 500;">By using this tool, you acknowledge these limitations.</p>
|
| 1138 |
+
</div>
|
| 1139 |
+
</div>
|
| 1140 |
+
""")
|
| 1141 |
+
|
| 1142 |
+
with gr.Row():
|
| 1143 |
+
with gr.Column(scale=1):
|
| 1144 |
+
gr.Markdown("## 📄 Upload & Stats")
|
| 1145 |
+
pdf_input = gr.File(
|
| 1146 |
+
label="Upload PDF Deed",
|
| 1147 |
+
file_types=[".pdf"],
|
| 1148 |
+
type="binary"
|
| 1149 |
+
)
|
| 1150 |
+
|
| 1151 |
+
analyze_button = gr.Button(
|
| 1152 |
+
"🔍 Analyze Deed",
|
| 1153 |
+
variant="primary",
|
| 1154 |
+
size="lg",
|
| 1155 |
+
elem_id="analyze-btn"
|
| 1156 |
+
)
|
| 1157 |
+
|
| 1158 |
+
# Add custom CSS for purple button
|
| 1159 |
+
gr.HTML("""
|
| 1160 |
+
<style>
|
| 1161 |
+
#analyze-btn {
|
| 1162 |
+
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
|
| 1163 |
+
border: none !important;
|
| 1164 |
+
color: white !important;
|
| 1165 |
+
font-weight: 600 !important;
|
| 1166 |
+
box-shadow: 0 4px 6px rgba(102, 126, 234, 0.4) !important;
|
| 1167 |
+
transition: all 0.3s ease !important;
|
| 1168 |
+
}
|
| 1169 |
+
#analyze-btn:hover {
|
| 1170 |
+
background: linear-gradient(135deg, #5a67d8 0%, #6a3f8f 100%) !important;
|
| 1171 |
+
box-shadow: 0 6px 12px rgba(102, 126, 234, 0.6) !important;
|
| 1172 |
+
transform: translateY(-2px) !important;
|
| 1173 |
+
}
|
| 1174 |
+
</style>
|
| 1175 |
+
""")
|
| 1176 |
+
|
| 1177 |
+
# Add CSS for purple tab indicators
|
| 1178 |
+
gr.HTML("""
|
| 1179 |
+
<style>
|
| 1180 |
+
/* Force blue color for all tab interactions */
|
| 1181 |
+
.tabs > button.selected,
|
| 1182 |
+
.tab-nav > button.selected,
|
| 1183 |
+
button[role="tab"][aria-selected="true"],
|
| 1184 |
+
button[role="tab"].selected {
|
| 1185 |
+
color: #667eea !important;
|
| 1186 |
+
border-bottom-color: #667eea !important;
|
| 1187 |
+
}
|
| 1188 |
+
|
| 1189 |
+
.tabs > button:hover,
|
| 1190 |
+
.tab-nav > button:hover,
|
| 1191 |
+
button[role="tab"]:hover {
|
| 1192 |
+
color: #667eea !important;
|
| 1193 |
+
border-bottom-color: #667eea !important;
|
| 1194 |
+
}
|
| 1195 |
+
|
| 1196 |
+
/* Target the specific orange underline if present */
|
| 1197 |
+
.tabs > button.selected::after,
|
| 1198 |
+
button[role="tab"][aria-selected="true"]::after {
|
| 1199 |
+
background-color: #667eea !important;
|
| 1200 |
+
}
|
| 1201 |
+
|
| 1202 |
+
/* Custom Loading Animation - Blue Pulse */
|
| 1203 |
+
.generating {
|
| 1204 |
+
border-color: #667eea !important;
|
| 1205 |
+
}
|
| 1206 |
+
|
| 1207 |
+
/* Override default orange spinner/loader */
|
| 1208 |
+
.loader {
|
| 1209 |
+
--loader-color: #667eea !important;
|
| 1210 |
+
border-top-color: #667eea !important;
|
| 1211 |
+
border-left-color: #667eea !important;
|
| 1212 |
+
}
|
| 1213 |
+
|
| 1214 |
+
/* Add a subtle blue glow to active processing elements */
|
| 1215 |
+
.generating::before {
|
| 1216 |
+
background: linear-gradient(90deg, transparent, rgba(102, 126, 234, 0.2), transparent) !important;
|
| 1217 |
+
}
|
| 1218 |
+
</style>
|
| 1219 |
+
""")
|
| 1220 |
+
|
| 1221 |
+
gr.Markdown("### 📊 Quick Stats")
|
| 1222 |
+
stats_display = gr.Markdown(
|
| 1223 |
+
value="Upload a deed to see document statistics...",
|
| 1224 |
+
elem_id="stats"
|
| 1225 |
+
)
|
| 1226 |
+
|
| 1227 |
+
gr.Markdown("### 🧭 Deed Metadata")
|
| 1228 |
+
deed_type_box = gr.Textbox(label="Deed Type", interactive=False)
|
| 1229 |
+
jurisdiction_box = gr.Textbox(label="Jurisdiction", interactive=False)
|
| 1230 |
+
consideration_box = gr.Textbox(label="Consideration / Price", interactive=False)
|
| 1231 |
+
parties_box = gr.Textbox(label="Parties", lines=6, interactive=False)
|
| 1232 |
+
property_box = gr.Textbox(label="Property Description", lines=4, interactive=False)
|
| 1233 |
+
|
| 1234 |
+
with gr.Column(scale=2):
|
| 1235 |
+
# Agent Internal Monologue (Terminal View)
|
| 1236 |
+
agent_monologue = gr.Code(
|
| 1237 |
+
label=" Reasoning Trace ",
|
| 1238 |
+
language="shell",
|
| 1239 |
+
interactive=False,
|
| 1240 |
+
elem_id="agent-terminal",
|
| 1241 |
+
lines=12,
|
| 1242 |
+
value="⚡ Waiting for document upload......"
|
| 1243 |
+
)
|
| 1244 |
+
|
| 1245 |
+
gr.Markdown("## 📋 Analysis Results")
|
| 1246 |
+
|
| 1247 |
+
with gr.Tabs():
|
| 1248 |
+
with gr.TabItem("📝 Overview"):
|
| 1249 |
+
classification_output = gr.DataFrame(
|
| 1250 |
+
headers=["field", "value"],
|
| 1251 |
+
label="Deed Classification (Table)",
|
| 1252 |
+
interactive=False,
|
| 1253 |
+
datatype=["str", "str"],
|
| 1254 |
+
col_count=(2, "fixed"),
|
| 1255 |
+
row_count=(0, "dynamic")
|
| 1256 |
+
)
|
| 1257 |
+
risk_overview_output = gr.Markdown(
|
| 1258 |
+
value="Risk overview will appear here after analysis.",
|
| 1259 |
+
label="Risk Overview"
|
| 1260 |
+
)
|
| 1261 |
+
report_file = gr.File(label="Download Report", interactive=False)
|
| 1262 |
+
|
| 1263 |
+
with gr.TabItem("✂️ Clause Breakdown"):
|
| 1264 |
+
clause_table = gr.DataFrame(
|
| 1265 |
+
headers=["id", "type", "words", "preview"],
|
| 1266 |
+
label="Clauses",
|
| 1267 |
+
interactive=False,
|
| 1268 |
+
wrap=True,
|
| 1269 |
+
column_widths=["10%", "20%", "10%", "60%"] # ← Constrain preview to 60% width
|
| 1270 |
+
)
|
| 1271 |
+
|
| 1272 |
+
with gr.TabItem("⚠️ Risk Analysis"):
|
| 1273 |
+
risk_table = gr.DataFrame(
|
| 1274 |
+
headers=["clause_id", "clause_type", "risk_level", "summary"],
|
| 1275 |
+
label="Clause Risks",
|
| 1276 |
+
interactive=False,
|
| 1277 |
+
wrap=True,
|
| 1278 |
+
column_widths=["10%", "20%", "10%", "60%"]
|
| 1279 |
+
)
|
| 1280 |
+
|
| 1281 |
+
with gr.TabItem("📄 Extracted Text"):
|
| 1282 |
+
text_output = gr.Textbox(
|
| 1283 |
+
value="Upload and analyze a deed to see extracted text...",
|
| 1284 |
+
label="OCR Text Extraction",
|
| 1285 |
+
lines=30,
|
| 1286 |
+
max_lines=None, # No limit - show full text
|
| 1287 |
+
interactive=False,
|
| 1288 |
+
autoscroll=False # Prevent auto-scrolling to bottom
|
| 1289 |
+
)
|
| 1290 |
+
|
| 1291 |
+
with gr.TabItem("🗃️ Raw JSON"):
|
| 1292 |
+
json_output = gr.JSON(
|
| 1293 |
+
label="Full Response",
|
| 1294 |
+
value=None
|
| 1295 |
+
)
|
| 1296 |
+
|
| 1297 |
+
# Usage instructions
|
| 1298 |
+
gr.Markdown("""
|
| 1299 |
+
## 🔧 How to Use
|
| 1300 |
+
|
| 1301 |
+
1. **Upload** a PDF deed document using the file uploader
|
| 1302 |
+
2. **Click** the "Analyze Deed" button to start processing
|
| 1303 |
+
3. **Review** the results in the tabs:
|
| 1304 |
+
- **Classification:** Deed type, parties, and key information
|
| 1305 |
+
- **Clause Breakdown:** Identified legal clauses and sections
|
| 1306 |
+
- **Risk Analysis:** Potential legal risks and recommendations
|
| 1307 |
+
- **Extracted Text:** Raw text extracted from the PDF
|
| 1308 |
+
4. **Consult** a qualified attorney for legal advice based on the analysis
|
| 1309 |
+
|
| 1310 |
+
### ⚡ Processing Time
|
| 1311 |
+
- Analysis typically takes 30-60 seconds or more depending on document complexity
|
| 1312 |
+
- Multi-page deeds may take longer for OCR processing
|
| 1313 |
+
|
| 1314 |
+
### 📋 Supported Documents
|
| 1315 |
+
- Property sale deeds
|
| 1316 |
+
- Mortgage deeds
|
| 1317 |
+
- Lease agreements
|
| 1318 |
+
- Gift deeds
|
| 1319 |
+
- Warranty deeds
|
| 1320 |
+
- Quitclaim deeds
|
| 1321 |
+
""")
|
| 1322 |
+
|
| 1323 |
+
# Event handlers
|
| 1324 |
+
def update_stats(pdf_file):
|
| 1325 |
+
"""Update quick stats display."""
|
| 1326 |
+
if pdf_file is None:
|
| 1327 |
+
return "No document uploaded"
|
| 1328 |
+
|
| 1329 |
+
try:
|
| 1330 |
+
# Handle different Gradio file input formats
|
| 1331 |
+
if hasattr(pdf_file, 'read') and hasattr(pdf_file, 'seek'):
|
| 1332 |
+
# File-like object
|
| 1333 |
+
file_size = len(pdf_file.read())
|
| 1334 |
+
pdf_file.seek(0) # Reset file pointer
|
| 1335 |
+
elif isinstance(pdf_file, str):
|
| 1336 |
+
# File path string
|
| 1337 |
+
import os
|
| 1338 |
+
file_size = os.path.getsize(pdf_file)
|
| 1339 |
+
elif isinstance(pdf_file, bytes):
|
| 1340 |
+
# Raw bytes
|
| 1341 |
+
file_size = len(pdf_file)
|
| 1342 |
+
else:
|
| 1343 |
+
return f"📊 **Document Stats:**\n• File type: {type(pdf_file).__name__}\n• Status: Ready for analysis"
|
| 1344 |
+
|
| 1345 |
+
return f"""📊 **Document Stats:**
|
| 1346 |
+
• File size: {file_size:,} bytes
|
| 1347 |
+
• Status: Ready for analysis
|
| 1348 |
+
• Click 'Analyze Deed' to start processing"""
|
| 1349 |
+
except:
|
| 1350 |
+
return "Error reading document information"
|
| 1351 |
+
|
| 1352 |
+
def _write_report_file(report_data: Dict[str, Any]) -> Optional[str]:
|
| 1353 |
+
"""Generate and return PDF report file path."""
|
| 1354 |
+
try:
|
| 1355 |
+
return build_report_pdf(report_data)
|
| 1356 |
+
except Exception as e:
|
| 1357 |
+
print(f"Error generating PDF report: {e}")
|
| 1358 |
+
return None
|
| 1359 |
+
return None
|
| 1360 |
+
|
| 1361 |
+
async def analyze_deed_gradio(pdf_file):
|
| 1362 |
+
"""Main analysis function for Gradio interface with streaming logs."""
|
| 1363 |
+
import random
|
| 1364 |
+
import time
|
| 1365 |
+
|
| 1366 |
+
# Initialize empty return values
|
| 1367 |
+
empty_table: List[Dict[str, Any]] = []
|
| 1368 |
+
loading_table = [{"field": "Status", "value": "⏳ Waiting to start..."}]
|
| 1369 |
+
empty_text = "No data"
|
| 1370 |
+
|
| 1371 |
+
current_log = ""
|
| 1372 |
+
# Initial yield - System Boot
|
| 1373 |
+
yield (
|
| 1374 |
+
current_log,
|
| 1375 |
+
loading_table,
|
| 1376 |
+
"Risk overview will appear here after analysis.",
|
| 1377 |
+
empty_table,
|
| 1378 |
+
empty_table,
|
| 1379 |
+
empty_text,
|
| 1380 |
+
{},
|
| 1381 |
+
"⏳ Waiting...",
|
| 1382 |
+
"⏳ Waiting...",
|
| 1383 |
+
"⏳ Waiting...",
|
| 1384 |
+
"⏳ Waiting...",
|
| 1385 |
+
"⏳ Waiting...",
|
| 1386 |
+
None
|
| 1387 |
+
)
|
| 1388 |
+
|
| 1389 |
+
if pdf_file is None:
|
| 1390 |
+
current_log += "\n❌ ERROR: No file uploaded."
|
| 1391 |
+
yield (
|
| 1392 |
+
current_log,
|
| 1393 |
+
empty_table,
|
| 1394 |
+
"❌ No file uploaded",
|
| 1395 |
+
empty_table,
|
| 1396 |
+
empty_table,
|
| 1397 |
+
empty_text,
|
| 1398 |
+
{},
|
| 1399 |
+
"N/A",
|
| 1400 |
+
"N/A",
|
| 1401 |
+
"N/A",
|
| 1402 |
+
"N/A",
|
| 1403 |
+
"N/A",
|
| 1404 |
+
None
|
| 1405 |
+
)
|
| 1406 |
+
return
|
| 1407 |
+
|
| 1408 |
+
try:
|
| 1409 |
+
# --- STEP 1: Text Extraction ---
|
| 1410 |
+
current_log += "\n● Extracting information (Step 1).\n > Target Tool: 'ocr_image' matrix injection.\n > Task: Extracting text from PDF document..."
|
| 1411 |
+
yield (
|
| 1412 |
+
current_log,
|
| 1413 |
+
[{"field": "Status", "value": "⏳ Extracting text..."}],
|
| 1414 |
+
"⏳ Extracting text...",
|
| 1415 |
+
empty_table,
|
| 1416 |
+
empty_table,
|
| 1417 |
+
"⏳ Extracting text from document...",
|
| 1418 |
+
{},
|
| 1419 |
+
"⏳ Extracting...",
|
| 1420 |
+
"⏳ Extracting...",
|
| 1421 |
+
"⏳ Extracting...",
|
| 1422 |
+
"⏳ Extracting...",
|
| 1423 |
+
"⏳ Extracting...",
|
| 1424 |
+
None
|
| 1425 |
+
)
|
| 1426 |
+
|
| 1427 |
+
# Handle file input
|
| 1428 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as tmp_file:
|
| 1429 |
+
if hasattr(pdf_file, 'read') and hasattr(pdf_file, 'seek'):
|
| 1430 |
+
pdf_file.seek(0)
|
| 1431 |
+
tmp_file.write(pdf_file.read())
|
| 1432 |
+
elif isinstance(pdf_file, str):
|
| 1433 |
+
with open(pdf_file, 'rb') as f:
|
| 1434 |
+
tmp_file.write(f.read())
|
| 1435 |
+
elif isinstance(pdf_file, bytes):
|
| 1436 |
+
tmp_file.write(pdf_file)
|
| 1437 |
+
tmp_path = tmp_file.name
|
| 1438 |
+
|
| 1439 |
+
# Call extraction tool
|
| 1440 |
+
text_result = await extract_text_from_deed_pdf(tmp_path)
|
| 1441 |
+
text_data = json.loads(text_result)
|
| 1442 |
+
|
| 1443 |
+
if not text_data["success"]:
|
| 1444 |
+
raise Exception(text_data.get("error", "Extraction failed"))
|
| 1445 |
+
|
| 1446 |
+
deed_text = text_data["text"]
|
| 1447 |
+
current_log += f"\n > Status: {len(deed_text)} characters extracted.\n✅ RESULT: Step 1 Complete. Text extraction successful."
|
| 1448 |
+
yield (
|
| 1449 |
+
current_log,
|
| 1450 |
+
[{"field": "Status", "value": "⏳ Classifying..."}],
|
| 1451 |
+
"⏳ Classifying document...",
|
| 1452 |
+
empty_table,
|
| 1453 |
+
empty_table,
|
| 1454 |
+
deed_text, # Show text early
|
| 1455 |
+
{},
|
| 1456 |
+
"⏳ Classifying...",
|
| 1457 |
+
"⏳ Classifying...",
|
| 1458 |
+
"⏳ Classifying...",
|
| 1459 |
+
"⏳ Classifying...",
|
| 1460 |
+
"⏳ Classifying...",
|
| 1461 |
+
None
|
| 1462 |
+
)
|
| 1463 |
+
|
| 1464 |
+
# --- STEP 2: Classification ---
|
| 1465 |
+
current_log += "\n\n● Executing Classification Layer (Step 2).\n > Target Tool: 'classify_deed_type'.\n > Analysis: Identifying document type and parties..."
|
| 1466 |
+
yield (
|
| 1467 |
+
current_log,
|
| 1468 |
+
[{"field": "Status", "value": "⏳ Identifying parties..."}],
|
| 1469 |
+
"⏳ Classifying document...",
|
| 1470 |
+
empty_table,
|
| 1471 |
+
empty_table,
|
| 1472 |
+
deed_text,
|
| 1473 |
+
{},
|
| 1474 |
+
"⏳ Identifying...",
|
| 1475 |
+
"⏳ Identifying...",
|
| 1476 |
+
"⏳ Identifying...",
|
| 1477 |
+
"⏳ Identifying...",
|
| 1478 |
+
"⏳ Identifying...",
|
| 1479 |
+
None
|
| 1480 |
+
)
|
| 1481 |
+
|
| 1482 |
+
classification_result = await classify_deed_type(deed_text)
|
| 1483 |
+
classification_data = json.loads(classification_result)
|
| 1484 |
+
metadata = extract_metadata_fields(classification_data)
|
| 1485 |
+
|
| 1486 |
+
current_log += f"\n✅ RESULT: Identified '{metadata['deed_type']}' (Jurisdiction: {metadata['jurisdiction']})."
|
| 1487 |
+
yield (
|
| 1488 |
+
current_log,
|
| 1489 |
+
format_classification_table(classification_data), # Show classification early
|
| 1490 |
+
"⏳ Analyzing risks...",
|
| 1491 |
+
empty_table,
|
| 1492 |
+
empty_table,
|
| 1493 |
+
deed_text,
|
| 1494 |
+
{},
|
| 1495 |
+
metadata["deed_type"],
|
| 1496 |
+
metadata["jurisdiction"],
|
| 1497 |
+
metadata["parties"],
|
| 1498 |
+
metadata["property"],
|
| 1499 |
+
metadata["consideration"],
|
| 1500 |
+
None
|
| 1501 |
+
)
|
| 1502 |
+
|
| 1503 |
+
# --- STEP 3: Clause Splitting ---
|
| 1504 |
+
# We don't log this step explicitly to save space, or we can merge it with Risk Analysis
|
| 1505 |
+
clauses_result = await split_deed_into_clauses(deed_text)
|
| 1506 |
+
clauses_data = json.loads(clauses_result)
|
| 1507 |
+
|
| 1508 |
+
# --- STEP 4: Risk Analysis ---
|
| 1509 |
+
current_log += "\n\n● Executing Risk Analysis Engine (Step 3).\n > Target Tool: 'analyze_deed_risks'.\n > Strategy: Cross-referencing clauses with risk database..."
|
| 1510 |
+
yield (
|
| 1511 |
+
current_log,
|
| 1512 |
+
format_classification_table(classification_data),
|
| 1513 |
+
"⏳ Analyzing risks...",
|
| 1514 |
+
format_clause_table(clauses_data), # Show clauses early
|
| 1515 |
+
empty_table,
|
| 1516 |
+
deed_text,
|
| 1517 |
+
{},
|
| 1518 |
+
metadata["deed_type"],
|
| 1519 |
+
metadata["jurisdiction"],
|
| 1520 |
+
metadata["parties"],
|
| 1521 |
+
metadata["property"],
|
| 1522 |
+
metadata["consideration"],
|
| 1523 |
+
None
|
| 1524 |
+
)
|
| 1525 |
+
|
| 1526 |
+
risks_result = await analyze_deed_risks(
|
| 1527 |
+
clauses_result,
|
| 1528 |
+
json.dumps(classification_data.get("classification", {}))
|
| 1529 |
+
)
|
| 1530 |
+
risks_data = json.loads(risks_result)
|
| 1531 |
+
|
| 1532 |
+
clause_count = len(clauses_data.get("clauses", []))
|
| 1533 |
+
current_log += f"\n > Found: {clause_count} clauses analyzed.\n✅ RESULT: Risk analysis complete. Report generated."
|
| 1534 |
+
|
| 1535 |
+
# Compile final report data
|
| 1536 |
+
report_data = {
|
| 1537 |
+
"success": True,
|
| 1538 |
+
"pdf_path": tmp_path,
|
| 1539 |
+
"extraction_metadata": text_data.get("metadata", {}),
|
| 1540 |
+
"deed_classification": classification_data,
|
| 1541 |
+
"clause_breakdown": clauses_data,
|
| 1542 |
+
"risk_analysis": risks_data,
|
| 1543 |
+
"text_preview": deed_text,
|
| 1544 |
+
"report_metadata": {
|
| 1545 |
+
"generated_at": time.time(),
|
| 1546 |
+
"analysis_steps": ["text_extraction", "classification", "risk_analysis"],
|
| 1547 |
+
"processing_method": "agentic_flow"
|
| 1548 |
+
}
|
| 1549 |
+
}
|
| 1550 |
+
|
| 1551 |
+
# Generate PDF report
|
| 1552 |
+
report_path = _write_report_file(report_data)
|
| 1553 |
+
|
| 1554 |
+
# Final Yield
|
| 1555 |
+
yield (
|
| 1556 |
+
current_log,
|
| 1557 |
+
format_classification_table(classification_data),
|
| 1558 |
+
format_risk_overview(risks_data),
|
| 1559 |
+
format_clause_table(clauses_data),
|
| 1560 |
+
format_risk_table(risks_data),
|
| 1561 |
+
deed_text,
|
| 1562 |
+
report_data,
|
| 1563 |
+
metadata["deed_type"],
|
| 1564 |
+
metadata["jurisdiction"],
|
| 1565 |
+
metadata["parties"],
|
| 1566 |
+
metadata["property"],
|
| 1567 |
+
metadata["consideration"],
|
| 1568 |
+
report_path
|
| 1569 |
+
)
|
| 1570 |
+
|
| 1571 |
+
except Exception as e:
|
| 1572 |
+
error_msg = f"❌ Analysis failed: {str(e)}"
|
| 1573 |
+
current_log += f"\n❌ SYSTEM ERROR: {str(e)}"
|
| 1574 |
+
yield (
|
| 1575 |
+
current_log,
|
| 1576 |
+
empty_table,
|
| 1577 |
+
error_msg,
|
| 1578 |
+
empty_table,
|
| 1579 |
+
empty_table,
|
| 1580 |
+
error_msg,
|
| 1581 |
+
{},
|
| 1582 |
+
"❌ Error",
|
| 1583 |
+
"❌ Error",
|
| 1584 |
+
"❌ Error",
|
| 1585 |
+
"❌ Error",
|
| 1586 |
+
"❌ Error",
|
| 1587 |
+
None
|
| 1588 |
+
)
|
| 1589 |
+
|
| 1590 |
+
# Connect event handlers
|
| 1591 |
+
pdf_input.change(
|
| 1592 |
+
fn=update_stats,
|
| 1593 |
+
inputs=[pdf_input],
|
| 1594 |
+
outputs=[stats_display]
|
| 1595 |
+
)
|
| 1596 |
+
|
| 1597 |
+
analyze_button.click(
|
| 1598 |
+
fn=analyze_deed_gradio,
|
| 1599 |
+
inputs=[pdf_input],
|
| 1600 |
+
outputs=[
|
| 1601 |
+
agent_monologue,
|
| 1602 |
+
classification_output,
|
| 1603 |
+
risk_overview_output,
|
| 1604 |
+
clause_table,
|
| 1605 |
+
risk_table,
|
| 1606 |
+
text_output,
|
| 1607 |
+
json_output,
|
| 1608 |
+
deed_type_box,
|
| 1609 |
+
jurisdiction_box,
|
| 1610 |
+
parties_box,
|
| 1611 |
+
property_box,
|
| 1612 |
+
consideration_box,
|
| 1613 |
+
report_file
|
| 1614 |
+
]
|
| 1615 |
+
)
|
| 1616 |
+
|
| 1617 |
+
# Mount Gradio app to FastAPI
|
| 1618 |
+
# Note: Gradio 6.0 does not support theme parameter in mount_gradio_app
|
| 1619 |
+
# The app will use Gradio's default light theme
|
| 1620 |
+
# The styled disclaimer box (HTML gradient) is still preserved for visual appeal
|
| 1621 |
+
app = gr.mount_gradio_app(app, gradio_app, path="/")
|
| 1622 |
+
|
| 1623 |
+
if __name__ == "__main__":
|
| 1624 |
+
import uvicorn
|
| 1625 |
+
print("🏛️ Starting Legal Deed Review Web Application...")
|
| 1626 |
+
print("📍 Server will be available at: http://localhost:8002")
|
| 1627 |
+
print("🔧 API endpoint: http://localhost:8002/analyze-deed")
|
| 1628 |
+
print("🌐 Gradio interface: http://localhost:8002/")
|
| 1629 |
+
print("\n⚖️ Legal Notice: This tool provides analysis only, not legal advice.")
|
| 1630 |
+
print("✋ Press Ctrl+C to stop the server")
|
| 1631 |
+
|
| 1632 |
+
uvicorn.run(app, host="0.0.0.0", port=8002, reload=True)
|