File size: 3,997 Bytes
30ff09c
 
 
 
 
 
eb7343a
 
 
30ff09c
 
 
 
9849e16
 
30ff09c
9849e16
30ff09c
 
 
 
9849e16
30ff09c
 
9849e16
 
30ff09c
9849e16
30ff09c
 
9849e16
 
30ff09c
9849e16
 
 
 
 
30ff09c
9849e16
 
30ff09c
9849e16
 
 
 
 
30ff09c
9849e16
 
30ff09c
9849e16
30ff09c
 
 
 
 
9849e16
 
30ff09c
 
 
 
9849e16
 
 
 
 
 
 
 
 
30ff09c
 
 
9849e16
 
 
30ff09c
 
 
9849e16
 
30ff09c
9849e16
 
 
 
 
 
 
 
 
 
 
 
30ff09c
 
 
 
9849e16
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Train Model from PDFs</title>
    <a href="entrenament-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Entrenament PDF</a>
    <a href="preguntar-pdf.html" style="margin:5px;padding: 5px; border:1px solid green">Preguntar PDF</a>
    <br><br>
    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
    <script src="https://cdn.jsdelivr.net/npm/pdfjs-dist/build/pdf.min.js"></script>
</head>
<body>
    <h1>Train Model from PDFs</h1>
    <input type="file" id="fileInput" multiple>
    <button id="trainModel">Train Model</button>
    <pre id="status"></pre>

    <script>
        async function extractTextFromPDF(file) {
            const pdf = await pdfjsLib.getDocument(await file.arrayBuffer()).promise;
            let text = '';
            for (let i = 1; i <= pdf.numPages; i++) {
                const page = await pdf.getPage(i);
                const content = await page.getTextContent();
                text += content.items.map(item => item.str).join(' ') + ' ';
            }
            return text;
        }

        async function trainModel(data) {
            const model = tf.sequential();

            model.add(tf.layers.dense({
                units: 128,
                activation: 'relu',
                inputShape: [data[0].length]
            }));

            model.add(tf.layers.dense({ units: 64, activation: 'relu' }));
            model.add(tf.layers.dense({ units: 1, activation: 'sigmoid' }));

            model.compile({
                optimizer: 'adam',
                loss: 'binaryCrossentropy',
                metrics: ['accuracy']
            });

            const inputs = tf.tensor2d(data.map(d => d.input));
            const labels = tf.tensor1d(data.map(d => d.label));

            document.getElementById('status').textContent = 'Training the model...';

            await model.fit(inputs, labels, {
                epochs: 10,
                callbacks: {
                    onEpochEnd: (epoch, logs) => {
                        console.log(`Epoch ${epoch}: loss = ${logs.loss}`);
                        document.getElementById('status').textContent = `Epoch ${epoch + 1}: Loss = ${logs.loss}`;
                    }
                }
            });

            document.getElementById('status').textContent = 'Saving the model to IndexedDB...';

            try {
                await model.save('indexeddb://pdf-trained-model');
                document.getElementById('status').textContent = 'Model saved successfully in IndexedDB!';
            } catch (err) {
                document.getElementById('status').textContent = 'Error saving the model: ' + err.message;
                console.error('Error saving the model:', err);
            }
        }

        document.getElementById('trainModel').addEventListener('click', async () => {
            const files = document.getElementById('fileInput').files;
            if (!files.length) {
                document.getElementById('status').textContent = 'Please select PDF files to train the model.';
                return;
            }

            const data = [];
            document.getElementById('status').textContent = 'Extracting text from PDFs...';

            for (const file of files) {
                const text = await extractTextFromPDF(file);
                const tokens = text.split(/\s+/).map(word => word.length); // Example: using word lengths as features

                data.push({
                    input: tokens.slice(0, 10), // Use the first 10 tokens as input features
                    label: 1 // Example label (adjust as needed for your use case)
                });
            }

            document.getElementById('status').textContent = 'Training the model...';
            await trainModel(data);
        });
    </script>
</body>
</html>