Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -121,13 +121,10 @@ def predict(file_obj):
|
|
121 |
try:
|
122 |
sequences = parse_fasta(text)
|
123 |
for header, seq in sequences:
|
124 |
-
# Get
|
125 |
-
kmer_vector = sequence_to_kmer_vector(seq)
|
126 |
-
kmer_vector = scaler.transform(kmer_vector.reshape(1, -1))
|
127 |
-
X_tensor = torch.FloatTensor(kmer_vector).to(device)
|
128 |
-
|
129 |
-
# Get raw frequency vector before scaling
|
130 |
raw_freq_vector = sequence_to_kmer_vector(seq)
|
|
|
|
|
131 |
|
132 |
# Get predictions and feature importance
|
133 |
with torch.no_grad():
|
@@ -138,17 +135,18 @@ def predict(file_obj):
|
|
138 |
importance = model.get_feature_importance(X_tensor)
|
139 |
kmer_importance = importance[0].cpu().numpy()
|
140 |
|
141 |
-
# Normalize importance scores to
|
142 |
kmer_importance = kmer_importance / np.max(np.abs(kmer_importance)) * 0.002
|
143 |
|
144 |
-
# Get top 10 k-mers
|
145 |
top_k = 10
|
146 |
top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]
|
147 |
important_kmers = [
|
148 |
{
|
149 |
'kmer': list(kmer_dict.keys())[list(kmer_dict.values()).index(i)],
|
150 |
'importance': float(kmer_importance[i]),
|
151 |
-
'frequency': float(raw_freq_vector[i])
|
|
|
152 |
}
|
153 |
for i in top_indices
|
154 |
]
|
@@ -166,7 +164,7 @@ Non-human probability: {float(probs[0][0]):0.4f}
|
|
166 |
Most influential k-mers:"""
|
167 |
|
168 |
for kmer in important_kmers:
|
169 |
-
result += f"\n {kmer['kmer']}: importance={kmer['importance']:.4f},
|
170 |
|
171 |
results.append(result)
|
172 |
except Exception as e:
|
|
|
121 |
try:
|
122 |
sequences = parse_fasta(text)
|
123 |
for header, seq in sequences:
|
124 |
+
# Get raw frequency vector and scaled vector
|
|
|
|
|
|
|
|
|
|
|
125 |
raw_freq_vector = sequence_to_kmer_vector(seq)
|
126 |
+
kmer_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
|
127 |
+
X_tensor = torch.FloatTensor(kmer_vector).to(device)
|
128 |
|
129 |
# Get predictions and feature importance
|
130 |
with torch.no_grad():
|
|
|
135 |
importance = model.get_feature_importance(X_tensor)
|
136 |
kmer_importance = importance[0].cpu().numpy()
|
137 |
|
138 |
+
# Normalize importance scores to original scale
|
139 |
kmer_importance = kmer_importance / np.max(np.abs(kmer_importance)) * 0.002
|
140 |
|
141 |
+
# Get top 10 k-mers based on absolute importance
|
142 |
top_k = 10
|
143 |
top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]
|
144 |
important_kmers = [
|
145 |
{
|
146 |
'kmer': list(kmer_dict.keys())[list(kmer_dict.values()).index(i)],
|
147 |
'importance': float(kmer_importance[i]),
|
148 |
+
'frequency': float(raw_freq_vector[i]),
|
149 |
+
'scaled': float(kmer_vector[0][i])
|
150 |
}
|
151 |
for i in top_indices
|
152 |
]
|
|
|
164 |
Most influential k-mers:"""
|
165 |
|
166 |
for kmer in important_kmers:
|
167 |
+
result += f"\n {kmer['kmer']}: importance={kmer['importance']:.4f}, frequency={kmer['frequency']:.4f} ({kmer['frequency']*100:.2f}%), scaled={kmer['scaled']:.4f}"
|
168 |
|
169 |
results.append(result)
|
170 |
except Exception as e:
|