<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>Train Model from PDFs</title> <a href="entrenament-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Entrenament PDF</a> <a href="preguntar-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Preguntar PDF</a> <br><br> <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script> <script src="https://cdn.jsdelivr.net/npm/pdfjs-dist/build/pdf.min.js"></script> </head> <body> <h1>Train Model from PDFs</h1> <input type="file" id="fileInput" multiple> <button id="trainModel">Train Model</button> <pre id="status"></pre> <script> async function extractTextFromPDF(file) { const pdf = await pdfjsLib.getDocument(await file.arrayBuffer()).promise; let text = ''; for (let i = 1; i <= pdf.numPages; i++) { const page = await pdf.getPage(i); const content = await page.getTextContent(); text += content.items.map(item => item.str).join(' ') + ' '; } return text; } async function trainModel(data) { const model = tf.sequential(); model.add(tf.layers.dense({ units: 128, activation: 'relu', inputShape: [data[0].length] })); model.add(tf.layers.dense({ units: 64, activation: 'relu' })); model.add(tf.layers.dense({ units: 1, activation: 'sigmoid' })); model.compile({ optimizer: 'adam', loss: 'binaryCrossentropy', metrics: ['accuracy'] }); const inputs = tf.tensor2d(data.map(d => d.input)); const labels = tf.tensor1d(data.map(d => d.label)); document.getElementById('status').textContent = 'Training the model...'; await model.fit(inputs, labels, { epochs: 10, callbacks: { onEpochEnd: (epoch, logs) => { console.log(`Epoch ${epoch}: loss = ${logs.loss}`); document.getElementById('status').textContent = `Epoch ${epoch + 1}: Loss = ${logs.loss}`; } } }); document.getElementById('status').textContent = 'Saving the model to IndexedDB...'; try { await model.save('indexeddb://pdf-trained-model'); document.getElementById('status').textContent = 'Model saved successfully in IndexedDB!'; } catch (err) { document.getElementById('status').textContent = 'Error saving the model: ' + err.message; console.error('Error saving the model:', err); } } document.getElementById('trainModel').addEventListener('click', async () => { const files = document.getElementById('fileInput').files; if (!files.length) { document.getElementById('status').textContent = 'Please select PDF files to train the model.'; return; } const data = []; document.getElementById('status').textContent = 'Extracting text from PDFs...'; for (const file of files) { const text = await extractTextFromPDF(file); const tokens = text.split(/\s+/).map(word => word.length); // Example: using word lengths as features data.push({ input: tokens.slice(0, 10), // Use the first 10 tokens as input features label: 1 // Example label (adjust as needed for your use case) }); } document.getElementById('status').textContent = 'Training the model...'; await trainModel(data); }); </script> </body> </html>