Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -12,7 +12,7 @@ from nltk.tokenize import sent_tokenize
|
|
| 12 |
from transformers import MarianMTModel, MarianTokenizer
|
| 13 |
|
| 14 |
API_KEY = os.environ.get("API_KEY")
|
| 15 |
-
VALID_IMAGE_EXTENSIONS = {"
|
| 16 |
|
| 17 |
app = FastAPI()
|
| 18 |
# CORS issue write below code
|
|
@@ -43,13 +43,11 @@ async def ocr(
|
|
| 43 |
try:
|
| 44 |
|
| 45 |
# # Check if the file format is allowed
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
|
| 50 |
content = await image.read()
|
| 51 |
-
print("[filename]", image.filename.split("."))
|
| 52 |
-
# print("[image extension]", image.filename.split(".").len)
|
| 53 |
image = Image.open(BytesIO(content))
|
| 54 |
text = pytesseract.image_to_string(image, lang = 'eng')
|
| 55 |
# text = pytesseract.image_to_string(image, lang="+".join(languages))
|
|
|
|
| 12 |
from transformers import MarianMTModel, MarianTokenizer
|
| 13 |
|
| 14 |
API_KEY = os.environ.get("API_KEY")
|
| 15 |
+
VALID_IMAGE_EXTENSIONS = {"jpg", "jpeg", "png"}
|
| 16 |
|
| 17 |
app = FastAPI()
|
| 18 |
# CORS issue write below code
|
|
|
|
| 43 |
try:
|
| 44 |
|
| 45 |
# # Check if the file format is allowed
|
| 46 |
+
file_extension = image.filename.split(".")[-1].lower()
|
| 47 |
+
if file_extension not in VALID_IMAGE_EXTENSIONS:
|
| 48 |
+
raise HTTPException(status_code=400, detail="Invalid file format. Only .jpg, .jpeg, and .png are allowed.")
|
| 49 |
|
| 50 |
content = await image.read()
|
|
|
|
|
|
|
| 51 |
image = Image.open(BytesIO(content))
|
| 52 |
text = pytesseract.image_to_string(image, lang = 'eng')
|
| 53 |
# text = pytesseract.image_to_string(image, lang="+".join(languages))
|