Spaces:
Running
Running
File size: 3,997 Bytes
30ff09c eb7343a 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 30ff09c 9849e16 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Train Model from PDFs</title>
<a href="entrenament-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Entrenament PDF</a>
<a href="preguntar-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Preguntar PDF</a>
<br><br>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
<script src="https://cdn.jsdelivr.net/npm/pdfjs-dist/build/pdf.min.js"></script>
</head>
<body>
<h1>Train Model from PDFs</h1>
<input type="file" id="fileInput" multiple>
<button id="trainModel">Train Model</button>
<pre id="status"></pre>
<script>
async function extractTextFromPDF(file) {
const pdf = await pdfjsLib.getDocument(await file.arrayBuffer()).promise;
let text = '';
for (let i = 1; i <= pdf.numPages; i++) {
const page = await pdf.getPage(i);
const content = await page.getTextContent();
text += content.items.map(item => item.str).join(' ') + ' ';
}
return text;
}
async function trainModel(data) {
const model = tf.sequential();
model.add(tf.layers.dense({
units: 128,
activation: 'relu',
inputShape: [data[0].length]
}));
model.add(tf.layers.dense({ units: 64, activation: 'relu' }));
model.add(tf.layers.dense({ units: 1, activation: 'sigmoid' }));
model.compile({
optimizer: 'adam',
loss: 'binaryCrossentropy',
metrics: ['accuracy']
});
const inputs = tf.tensor2d(data.map(d => d.input));
const labels = tf.tensor1d(data.map(d => d.label));
document.getElementById('status').textContent = 'Training the model...';
await model.fit(inputs, labels, {
epochs: 10,
callbacks: {
onEpochEnd: (epoch, logs) => {
console.log(`Epoch ${epoch}: loss = ${logs.loss}`);
document.getElementById('status').textContent = `Epoch ${epoch + 1}: Loss = ${logs.loss}`;
}
}
});
document.getElementById('status').textContent = 'Saving the model to IndexedDB...';
try {
await model.save('indexeddb://pdf-trained-model');
document.getElementById('status').textContent = 'Model saved successfully in IndexedDB!';
} catch (err) {
document.getElementById('status').textContent = 'Error saving the model: ' + err.message;
console.error('Error saving the model:', err);
}
}
document.getElementById('trainModel').addEventListener('click', async () => {
const files = document.getElementById('fileInput').files;
if (!files.length) {
document.getElementById('status').textContent = 'Please select PDF files to train the model.';
return;
}
const data = [];
document.getElementById('status').textContent = 'Extracting text from PDFs...';
for (const file of files) {
const text = await extractTextFromPDF(file);
const tokens = text.split(/\s+/).map(word => word.length); // Example: using word lengths as features
data.push({
input: tokens.slice(0, 10), // Use the first 10 tokens as input features
label: 1 // Example label (adjust as needed for your use case)
});
}
document.getElementById('status').textContent = 'Training the model...';
await trainModel(data);
});
</script>
</body>
</html>
|