hiyata commited on
Commit
d2177a6
·
verified ·
1 Parent(s): 233c5f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -10
app.py CHANGED
@@ -121,13 +121,10 @@ def predict(file_obj):
121
  try:
122
  sequences = parse_fasta(text)
123
  for header, seq in sequences:
124
- # Get k-mer vector
125
- kmer_vector = sequence_to_kmer_vector(seq)
126
- kmer_vector = scaler.transform(kmer_vector.reshape(1, -1))
127
- X_tensor = torch.FloatTensor(kmer_vector).to(device)
128
-
129
- # Get raw frequency vector before scaling
130
  raw_freq_vector = sequence_to_kmer_vector(seq)
 
 
131
 
132
  # Get predictions and feature importance
133
  with torch.no_grad():
@@ -138,17 +135,18 @@ def predict(file_obj):
138
  importance = model.get_feature_importance(X_tensor)
139
  kmer_importance = importance[0].cpu().numpy()
140
 
141
- # Normalize importance scores to match original scale
142
  kmer_importance = kmer_importance / np.max(np.abs(kmer_importance)) * 0.002
143
 
144
- # Get top 10 k-mers
145
  top_k = 10
146
  top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]
147
  important_kmers = [
148
  {
149
  'kmer': list(kmer_dict.keys())[list(kmer_dict.values()).index(i)],
150
  'importance': float(kmer_importance[i]),
151
- 'frequency': float(raw_freq_vector[i])
 
152
  }
153
  for i in top_indices
154
  ]
@@ -166,7 +164,7 @@ Non-human probability: {float(probs[0][0]):0.4f}
166
  Most influential k-mers:"""
167
 
168
  for kmer in important_kmers:
169
- result += f"\n {kmer['kmer']}: importance={kmer['importance']:.4f}, raw_freq={kmer['raw_freq']:.4f} ({kmer['raw_freq']*100:.2f}%), scaled_freq={kmer['scaled_freq']:.4f}"
170
 
171
  results.append(result)
172
  except Exception as e:
 
121
  try:
122
  sequences = parse_fasta(text)
123
  for header, seq in sequences:
124
+ # Get raw frequency vector and scaled vector
 
 
 
 
 
125
  raw_freq_vector = sequence_to_kmer_vector(seq)
126
+ kmer_vector = scaler.transform(raw_freq_vector.reshape(1, -1))
127
+ X_tensor = torch.FloatTensor(kmer_vector).to(device)
128
 
129
  # Get predictions and feature importance
130
  with torch.no_grad():
 
135
  importance = model.get_feature_importance(X_tensor)
136
  kmer_importance = importance[0].cpu().numpy()
137
 
138
+ # Normalize importance scores to original scale
139
  kmer_importance = kmer_importance / np.max(np.abs(kmer_importance)) * 0.002
140
 
141
+ # Get top 10 k-mers based on absolute importance
142
  top_k = 10
143
  top_indices = np.argsort(np.abs(kmer_importance))[-top_k:][::-1]
144
  important_kmers = [
145
  {
146
  'kmer': list(kmer_dict.keys())[list(kmer_dict.values()).index(i)],
147
  'importance': float(kmer_importance[i]),
148
+ 'frequency': float(raw_freq_vector[i]),
149
+ 'scaled': float(kmer_vector[0][i])
150
  }
151
  for i in top_indices
152
  ]
 
164
  Most influential k-mers:"""
165
 
166
  for kmer in important_kmers:
167
+ result += f"\n {kmer['kmer']}: importance={kmer['importance']:.4f}, frequency={kmer['frequency']:.4f} ({kmer['frequency']*100:.2f}%), scaled={kmer['scaled']:.4f}"
168
 
169
  results.append(result)
170
  except Exception as e: