Spaces:

ai4privacy
/

general-english-anonymiser-openpii-500k

Running

App Files Files Community

MikeDoes commited on Mar 21

Commit

95a61ac

verified ·

1 Parent(s): 0315cc8

Upload 2 files

Browse files

Files changed (2) hide show

ai4privacy-logo.png +0 -0
index.html +347 -19

ai4privacy-logo.png ADDED Viewed

index.html CHANGED Viewed

@@ -1,19 +1,347 @@
-<!doctype html>
-<html>
-	<head>
-		<meta charset="utf-8" />
-		<meta name="viewport" content="width=device-width" />
-		<title>My static Space</title>
-		<link rel="stylesheet" href="style.css" />
-	</head>
-	<body>
-		<div class="card">
-			<h1>Welcome to your static Space!</h1>
-			<p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
-			<p>
-				Also don't forget to check the
-				<a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
-			</p>
-		</div>
-	</body>
-</html>

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>Short Text and Open Source: Anonymiser</title>
+  <script src="https://cdn.tailwindcss.com"></script>
+  <script src="https://cdnjs.cloudflare.com/ajax/libs/iconify/2.0.0/iconify.min.js"></script>
+  <style>
+    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;700&display=swap');
+    * {
+      font-family: 'Inter', sans-serif;
+    }
+    textarea, #privacyMask {
+      transition: all 0.2s ease-in-out;
+    }
+    ::-webkit-scrollbar {
+      width: 6px;
+    }
+    ::-webkit-scrollbar-track {
+      background: #2d2d2d;
+    }
+    ::-webkit-scrollbar-thumb {
+      background: #4a4a4a;
+      border-radius: 3px;
+    }
+    .entity-tile {
+      transition: transform 0.2s, box-shadow 0.2s;
+    }
+    .entity-tile:hover {
+      transform: translateY(-2px);
+      box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+  </style>
+</head>
+<body class="bg-gray-900 min-h-screen">
+  <!-- Branding Header -->
+  <div class="bg-black/30 py-4 border-b border-white/10">
+    <div class="max-w-7xl mx-auto px-4 flex items-center justify-between">
+      <div class="flex items-center space-x-3">
+        <img src="ai4privacy-logo.png" alt="Logo" class="h-8 w-8">
+        <div>
+          <span class="text-xl font-bold text-white">Ai4Privacy</span>
+          <span class="block text-xs text-white/60">Short Text Anonymization Locally in Your Browser</span>
+        </div>
+      </div>
+      <!-- Settings Button -->
+      <button id="settingsButton" class="text-white/60 hover:text-white transition-colors">
+        <span class="iconify" data-icon="mdi:cog" data-width="24"></span>
+      </button>
+    </div>
+  </div>
+  <!-- Settings Panel -->
+  <div id="settingsPanel" class="hidden absolute right-4 top-20 bg-gray-800 border border-white/10 rounded-xl p-4 w-64 space-y-4 z-50">
+    <div>
+      <label class="block text-sm text-white/80 mb-2">Detection Threshold</label>
+      <input type="number" id="thresholdInput" step="0.001" min="0" max="1" value="0.01"
+             class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
+    </div>
+    <div>
+      <label class="block text-sm text-white/80 mb-2">Language Model</label>
+      <select id="modelSelect" class="w-full bg-gray-700 border border-white/10 rounded-lg px-3 py-2 text-white">
+        <option value="english">English - ai4privacy/llama-ai4privacy-english-anonymiser-openpii</option>
+      </select>
+    </div>
+  </div>
+  <div class="max-w-7xl mx-auto px-4 py-8">
+    <div class="flex flex-col lg:flex-row gap-8">
+      <!-- Input/Output Section -->
+      <div class="flex-1 space-y-6">
+        <div>
+          <label class="block text-sm font-medium text-white/80 mb-2">Input Text</label>
+          <textarea
+            id="inputText"
+            class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white placeholder-white/30 focus:border-blue-500 focus:ring-2 focus:ring-blue-500/30 resize-none"
+            rows="6"
+            placeholder="Enter sensitive text to anonymize..."
+          ></textarea>
+        </div>
+        <div>
+          <label class="block text-sm font-medium text-white/80 mb-2">Anonymized Output</label>
+          <textarea
+            id="outputText"
+            class="w-full p-4 bg-gray-800 border border-white/10 rounded-xl text-white/80 resize-none"
+            rows="6"
+            readonly
+          ></textarea>
+        </div>
+      </div>
+      <!-- Privacy Mask Panel -->
+      <div class="lg:w-96">
+        <div class="sticky top-8">
+            <label class="block text-sm font-medium text-white/80 mb-2">Detected Entities</label>
+          <div class="bg-gray-800 border border-white/10 rounded-xl p-4">
+            <div class="mb-4">
+              <span id="processingStatus" class="text-xs text-white/40">Ready</span>
+            </div>
+            <div
+              id="privacyMask"
+              class="h-96 bg-gray-850 rounded-lg p-3 overflow-y-auto text-sm space-y-2"
+            >
+              <div class="text-center text-white/40 py-4">Processing results will appear here</div>
+            </div>
+          </div>
+        </div>
+      </div>
+    </div>
+  </div>
+  <!-- Branding Footer -->
+  <div class="fixed bottom-0 left-0 right-0 bg-black/30 border-t border-white/10 py-3">
+    <div class="max-w-7xl mx-auto px-4">
+      <div class="flex items-center justify-between">
+        <div class="text-sm text-white/50">© 2025 Ai4Privacy. All rights reserved.</div>
+        <div class="flex items-center space-x-4">
+          <span class="text-sm text-white/50">v2.1.0</span>
+          <div class="w-px h-4 bg-white/10"></div>
+          <img src="ai4privacy-logo.png" alt="Logo" class="h-6 w-6 opacity-70">
+        </div>
+      </div>
+    </div>
+  </div>
+  <script type="module">
+    import { AutoModel, AutoTokenizer } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.4.0';
+    // Initialize variables
+    let tokenizer, model;
+    let isModelLoaded = false;
+    let currentInput = "";
+    // DOM Elements
+    const inputText = document.getElementById('inputText');
+    const outputText = document.getElementById('outputText');
+    const statusElement = document.getElementById('processingStatus');
+    // Add debounce to input handler
+    let timeout;
+    inputText.addEventListener('input', (event) => {
+      currentInput = event.target.value;
+      statusElement.textContent = 'Processing...';
+      clearTimeout(timeout);
+      timeout = setTimeout(updateOutput, 300);
+    });
+    async function loadModel() {
+      try {
+        tokenizer = await AutoTokenizer.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii');
+        model = await AutoModel.from_pretrained('ai4privacy/llama-ai4privacy-english-anonymiser-openpii', { dtype: "q8" });
+        isModelLoaded = true;
+        statusElement.textContent = 'Model loaded';
+        updateOutput();
+      } catch (err) {
+        console.error("Error loading model:", err);
+        statusElement.textContent = 'Error loading model';
+        outputText.value = "Error loading model.";
+      }
+    }
+    async function updateOutput() {
+      if (!isModelLoaded) {
+        statusElement.textContent = 'Loading model...';
+        outputText.value = "";
+        return;
+      }
+      try {
+        const processed = await processText(currentInput, tokenizer, model);
+        statusElement.textContent = `Processed ${currentInput.length} characters`;
+        outputText.value = processed.maskedText;
+        const privacyMaskDiv = document.getElementById('privacyMask');
+        privacyMaskDiv.innerHTML = '';
+        if (processed.replacements.length > 0) {
+          processed.replacements.forEach(replacement => {
+            const tile = document.createElement('div');
+            tile.className = 'entity-tile bg-gray-800 p-3 rounded-lg border border-white/10 hover:border-white/20';
+            tile.innerHTML = `
+              <div class="text-xs text-white/60 mb-1">${replacement.placeholder}</div>
+              <div class="text-sm text-white font-medium">${replacement.original}</div>
+              <div class="text-xs text-white/40 mt-1">Sensitive Information</div>
+              <div class="text-xs text-white/40 mt-1">Activation: ${Math.round(replacement.activation * 100)}%</div>
+            `;
+            privacyMaskDiv.appendChild(tile);
+          });
+        } else {
+          const emptyState = document.createElement('div');
+          emptyState.className = 'text-center text-white/40 py-4';
+          emptyState.textContent = 'No sensitive information detected.';
+          privacyMaskDiv.appendChild(emptyState);
+        }
+      } catch (err) {
+        statusElement.textContent = 'Error processing text';
+        console.error("Error processing text:", err);
+        outputText.value = "Error processing text.";
+      }
+    }
+    async function processText(text, tokenizer, model) {
+      const inputs = await tokenizer(text);
+      const inputTokens = inputs.input_ids.data;
+      const tokenStrings = Array.from(inputTokens).map(id =>
+        tokenizer.decode([id], { skip_special_tokens: false })
+      );
+      const { logits } = await model(inputs);
+      const logitsData = Array.from(logits.data);
+      const numTokens = tokenStrings.length;
+      const numClasses = 3;
+      const logitsPerToken = [];
+      for (let i = 0; i < numTokens; i++) {
+        logitsPerToken.push(logitsData.slice(i * numClasses, (i + 1) * numClasses));
+      }
+      function softmax(logits) {
+        const expLogits = logits.map(Math.exp);
+        const sumExp = expLogits.reduce((a, b) => a + b, 0);
+        return expLogits.map(exp => exp / sumExp);
+      }
+      const tokenPredictions = tokenStrings.map((token, i) => {
+        const probs = softmax(logitsPerToken[i]);
+        const maxSensitive = Math.max(probs[0], probs[1]);
+        return {
+          token: token,
+          start: i,
+          end: i + 1,
+          probabilities: {
+            "B-PRIVATE": probs[0],
+            "I-PRIVATE": probs[1],
+            "O": probs[2]
+          },
+          maxSensitiveScore: maxSensitive
+        };
+      });
+      const aggregated = aggregatePrivacyTokens(tokenPredictions);
+      const { maskedText, replacements } = maskText(tokenPredictions, aggregated);
+      return { maskedText, replacements };
+    }
+    function aggregatePrivacyTokens(tokenPredictions) {
+      const threshold = parseFloat(document.getElementById('thresholdInput').value) || 0.01;
+      const aggregated = [];
+      let i = 0;
+      const n = tokenPredictions.length;
+      while (i < n) {
+        const currentToken = tokenPredictions[i];
+        if (['[CLS]', '[SEP]'].includes(currentToken.token)) {
+          i++;
+          continue;
+        }
+        const startsWithSpace = currentToken.token.startsWith(' ');
+        const isFirstWord = aggregated.length === 0 && i === 0;
+        if (startsWithSpace || isFirstWord) {
+          const group = {
+            tokens: [currentToken],
+            indices: [i],
+            scores: [currentToken.maxSensitiveScore],
+            startsWithSpace: startsWithSpace
+          };
+          i++;
+          while (i < n &&
+                !tokenPredictions[i].token.startsWith(' ') &&
+                !['[CLS]', '[SEP]'].includes(tokenPredictions[i].token)) {
+            group.tokens.push(tokenPredictions[i]);
+            group.indices.push(i);
+            group.scores.push(tokenPredictions[i].maxSensitiveScore);
+            i++;
+          }
+          if (Math.max(...group.scores) >= threshold) {
+            aggregated.push(group);
+          }
+        } else {
+          i++;
+        }
+      }
+      return aggregated;
+    }
+    function maskText(tokenPredictions, aggregatedGroups) {
+      const maskedTokens = [];
+      const replacements = [];
+      const maskedIndices = new Set();
+      let redactedCounter = 1;
+      aggregatedGroups.forEach(group => {
+        group.indices.forEach(idx => maskedIndices.add(idx));
+      });
+      tokenPredictions.forEach((token, idx) => {
+        if (['[CLS]', '[SEP]'].includes(token.token)) return;
+        if (maskedIndices.has(idx)) {
+          const group = aggregatedGroups.find(g => g.indices[0] === idx);
+          if (group) {
+            const originalTokens = group.tokens.map(t => t.token);
+            const originalText = originalTokens
+              .map((token, i) => (i === 0 && group.startsWithSpace ? token.trimStart() : token))
+              .join('');
+            const placeholder = `[PII_${redactedCounter}]`;
+            replacements.push({
+              original: originalText,
+              placeholder: placeholder,
+              activation: Math.max(...group.scores) // Add activation score
+            });
+            redactedCounter++;
+            const maskWithSpace = group.startsWithSpace ? ` ${placeholder}` : placeholder;
+            maskedTokens.push(maskWithSpace);
+          }
+        } else {
+          maskedTokens.push(token.token);
+        }
+      });
+      return { maskedText: maskedTokens.join('').replace(/\s+/g, ' ').trim(), replacements };
+    }
+    // Load model when page loads
+    loadModel();
+    // Add settings toggle functionality
+    const settingsButton = document.getElementById('settingsButton');
+    const settingsPanel = document.getElementById('settingsPanel');
+    let settingsVisible = false;
+    settingsButton.addEventListener('click', (e) => {
+      settingsVisible = !settingsVisible;
+      settingsPanel.classList.toggle('hidden', !settingsVisible);
+      e.stopPropagation();
+    });
+    document.addEventListener('click', (e) => {
+      if (settingsVisible && !settingsPanel.contains(e.target)) {
+        settingsPanel.classList.add('hidden');
+        settingsVisible = false;
+      }
+    });
+  </script>
+</body>
+</html>