Spaces:
Runtime error
Runtime error
File size: 2,585 Bytes
57535ba 9663a4b 6bbf952 57535ba 6bbf952 57535ba 9663a4b 57535ba 6bbf952 57535ba 9663a4b 57535ba 9663a4b 6bbf952 57535ba 9663a4b 6bbf952 9663a4b 6bbf952 9663a4b 6bbf952 9663a4b 6bbf952 57535ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
from umap_reducer import UMAPReducer
from embeddings_encoder import EmbeddingsEncoder
from flask import Flask, request, render_template, jsonify, make_response, session
from flask_session import Session
from flask_cors import CORS, cross_origin
import os
from dotenv import load_dotenv
import feedparser
import json
from dateutil import parser
import re
import numpy as np
import gzip
import hashlib
load_dotenv()
app = Flask(__name__, static_url_path='/static')
app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY")
app.config["SESSION_PERMANENT"] = True
app.config["SESSION_TYPE"] = "filesystem"
app.config["SESSION_COOKIE_SAMESITE"] = "None"
app.config["SESSION_COOKIE_SECURE"] = True
Session(app)
CORS(app)
reducer = UMAPReducer()
encoder = EmbeddingsEncoder()
@app.route('/')
def index():
return render_template('index.html')
@app.route('/run-umap', methods=['POST'])
@cross_origin(supports_credentials=True)
def run_umap():
input_data = request.get_json()
sentences = input_data['data']['sentences']
umap_options = input_data['data']['umap_options']
cluster_options = input_data['data']['cluster_options']
# create unique hash for input, avoid recalculating embeddings
sentences_input_hash = hashlib.sha256(
''.join(sentences).encode("utf-8")).hexdigest()
print("input options:", sentences_input_hash,
umap_options, cluster_options, "\n\n")
try:
if not session.get(sentences_input_hash):
print("New input, calculating embeddings" "\n\n")
embeddings = encoder.encode(sentences)
session[sentences_input_hash] = embeddings.tolist()
else:
print("Input already calculated, using cached embeddings", "\n\n")
embeddings = session[sentences_input_hash]
# UMAP embeddings
reducer.setParams(umap_options, cluster_options)
umap_embeddings = reducer.embed(embeddings)
# HDBScan cluster analysis
clusters = reducer.clusterAnalysis(umap_embeddings)
content = gzip.compress(json.dumps(
{
"embeddings": umap_embeddings.tolist(),
"clusters": clusters.labels_.tolist()
}
).encode('utf8'), 5)
response = make_response(content)
response.headers['Content-length'] = len(content)
response.headers['Content-Encoding'] = 'gzip'
return response
except Exception as e:
return jsonify({"error": str(e)}), 400
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
|