charlieoneill
/

my_modernbert_person_embeddings

Feature Extraction

Model card Files Files and versions Community

charlieoneill commited on 18 days ago

Commit

3d541d7

·

verified ·

1 Parent(s): 0456053

Update pipeline.py

Files changed (1) hide show

pipeline.py +20 -1

pipeline.py CHANGED Viewed

@@ -1,8 +1,27 @@
 from transformers import AutoTokenizer, AutoModel
 import torch
 from typing import List
-from model import PersonEmbeddings
 class CustomEmbeddingPipeline:
     def __init__(self, model_id="answerdotai/ModernBERT-base"):

 from transformers import AutoTokenizer, AutoModel
 import torch
 from typing import List
+import torch.nn as nn
+class PersonEmbeddings(nn.Module):
+    def __init__(self, model_id: str):
+        super().__init__()
+        self.base_model = AutoModel.from_pretrained(model_id)
+        self.projection = nn.Sequential(
+            nn.Linear(768, 1024),
+            nn.ReLU(),
+            nn.Linear(1024, 1536)
+        )
+    def forward(self, input_ids, attention_mask):
+        outputs = self.base_model(
+            input_ids=input_ids,
+            attention_mask=attention_mask
+        )
+        last_hidden = outputs.last_hidden_state  # (B, seq_len, 768)
+        mean_pooled = last_hidden.mean(dim=1)    # (B, 768)
+        embeddings = self.projection(mean_pooled)  # (B, 1536)
+        return embeddings
 class CustomEmbeddingPipeline:
     def __init__(self, model_id="answerdotai/ModernBERT-base"):