Error while trying to run using transformers.js

#5
by hitchhiker3010 - opened

Hi

I'm new to implementing JS and transformers.js.
I was trying to run this model on the browser using the following code

<!DOCTYPE html>
<html>
<head>
  <title>Test Transformers.js</title>
  <script type="module">
    async function testSummarization() {
      try {
        // Load transformers.js
        const { env, AutoTokenizer, AutoModelForCausalLM, pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.1');
        console.log('Transformers.js loaded'); // Debugging statement
        env.allowLocalModels = false
        // Load the summarization pipeline
        const summarizationPipeline = await pipeline('text-generation', 'HuggingFaceTB/SmolLM2-135M-Instruct', {
 dtype: 'q4f16', use_external_data_format: true,
});
        console.log('Summarization pipeline loaded'); // Debugging statement

        // Run the summarization
        const text = 'Hi my name is SmolLm2';
        console.log(text);

        const result = await summarizationPipeline(text, { max_length: 13, min_length: 3, length_penalty: 2.0, num_beams: 1 });
        console.log('Summarization result:', result); // Debugging statement

        console.log(result[0].summary_text);
      } catch (error) {
        console.error('Error:', error);
      }
    }

    testSummarization();
  </script>
</head>
<body>
  <h1>Test Transformers.js</h1>
</body>
</html>

This is the console log that I'm getting and having a hard time to understand

test.html:10 Transformers.js loaded
test.html:17 Summarization pipeline loaded
test.html:21 Hi my name is SmolLm2
transformers@3.0.1:175 An error occurred during model execution: "283134224".
A @ transformers@3.0.1:175
await in A
D @ transformers@3.0.1:175
forward @ transformers@3.0.1:175
generate @ transformers@3.0.1:175
_call @ transformers@3.0.1:187
e @ transformers@3.0.1:214
testSummarization @ test.html:23
await in testSummarization
(anonymous) @ test.html:32
transformers@3.0.1:175 Inputs given to model: {input_ids: Proxy(o), attention_mask: Proxy(o), position_ids: Proxy(o), past_key_values.0.key: Proxy(o), past_key_values.0.value: Proxy(o), β€¦}
A @ transformers@3.0.1:175
await in A
D @ transformers@3.0.1:175
forward @ transformers@3.0.1:175
generate @ transformers@3.0.1:175
_call @ transformers@3.0.1:187
e @ transformers@3.0.1:214
testSummarization @ test.html:23
await in testSummarization
(anonymous) @ test.html:32
test.html:28 Error: 283134224
testSummarization @ test.html:28
await in testSummarization
(anonymous) @ test.html:32

Any help would be much appreciated, thanks in advance. πŸ€—

Hugging Face TB Research org

Hi there! We're aware of the issue (CPU implementation of f16 [Skip][Simplified]LayerNormalizaion; see here), and this will be fixed in v3.1 (coming soon).

In the meantime, you should be able to fix it by either:

  1. Using WebGPU:
const summarizationPipeline = await pipeline('text-generation', 'HuggingFaceTB/SmolLM2-135M-Instruct', {
 dtype: 'q4f16', device: 'webgpu',
});
  1. Using non-fp16 model:
const summarizationPipeline = await pipeline('text-generation', 'HuggingFaceTB/SmolLM2-135M-Instruct', {
 dtype: 'q4',
});

Thanks for the reply, I'm able to get the output.

hitchhiker3010 changed discussion status to closed

Hi

I was trying to implement chat template and generate using the code below.

<!DOCTYPE html>
<html>
<head>
  <title>Test Transformers.js</title>
  <script type="module">
    async function testSummarization() {
      try {
        // Load transformers.js
        const { env, AutoTokenizer, AutoModelForCausalLM, pipeline } = await import('https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.2');
        console.log('Transformers.js loaded'); // Debugging statement
        env.allowLocalModels = true;
        // env.allowRemoteModels = true;
        env.useBrowserCache = true;
        let model_name = 'HuggingFaceTB/SmolLM2-135M-Instruct'

        let tokenizer = await AutoTokenizer.from_pretrained(model_name);
        console.log('Tokenizer loaded'); // Debugging statement

        let model = await AutoModelForCausalLM.from_pretrained(model_name, {
          dtype: 'q4f16', device: 'webgpu'
        });
        //'onnx-community/Phi-3.5-mini-instruct-onnx-web', {
            // dtype: 'q4' , use_external_data_format: true
        console.log('Model loaded'); // Debugging statement

        const chat = [{"role": "user", "content": "What is the capital of France."}]

        console.log("chat", chat)
        let input_text = await tokenizer.apply_chat_template(chat , {tokenize: true, return_tensor: true, add_generation_prompt: true});
        console.log("input_text", input_text)
        console.log("input_text.dims",input_text.dims)
        // let inputs = tokenizer.encode(input_text, {return_tensors:true})
        // console.log("inputs", inputs)
        let outputs = await model.generate(input_text) // , { max_new_tokens: 130, max_length: 130, min_length: 3, length_penalty: 2.0, num_beams: 1 });
        console.log(outputs)
        let decoded = tokenizer.decode(outputs[0], { skip_special_tokens: true });
        console.log(decoded)
        
      } catch (error) {
        console.error('Error:', error);
      }
    }

    testSummarization();
  </script>
</head>
<body>
  <h1>Test Transformers.js</h1>
</body>
</html>

I'm running into following error [Error: TypeError: Cannot read properties of null (reading 'dims')] which I'm unable to debug [attaching the stack trace below]. I see there's dims property in the input_text variable

Transformers.js loaded
test_messages.html:17 Tokenizer loaded
transformers@3.0.2:100 2024-11-21 16:46:08.285399 [W:onnxruntime:, session_state.cc:1168 VerifyEachNodeIsAssignedToAnEp] Some nodes were not assigned to the preferred execution providers which may or may not have an negative impact on performance. e.g. ORT explicitly assigns shape related ops to CPU to improve perf.
or @ transformers@3.0.2:100
$func11798 @ ort-wasm-simd-threaded.jsep.wasm:0x103b449
$func1946 @ ort-wasm-simd-threaded.jsep.wasm:0x229810
$func836 @ ort-wasm-simd-threaded.jsep.wasm:0xc49fe
$func11721 @ ort-wasm-simd-threaded.jsep.wasm:0x1033705
$func630 @ ort-wasm-simd-threaded.jsep.wasm:0x8fad9
$func88 @ ort-wasm-simd-threaded.jsep.wasm:0xd635
$func14399 @ ort-wasm-simd-threaded.jsep.wasm:0x13b8e71
$func123 @ ort-wasm-simd-threaded.jsep.wasm:0x15847
$func2101 @ ort-wasm-simd-threaded.jsep.wasm:0x25f185
$func11258 @ ort-wasm-simd-threaded.jsep.wasm:0xfed8f4
$La @ ort-wasm-simd-threaded.jsep.wasm:0xaf6beb
t.<computed> @ transformers@3.0.2:100
p._OrtCreateSession @ transformers@3.0.2:100
(anonymous) @ transformers@3.0.2:100
ed @ transformers@3.0.2:100
bd @ transformers@3.0.2:100
loadModel @ transformers@3.0.2:100
createInferenceSessionHandler @ transformers@3.0.2:100
create @ transformers@3.0.2:100
await in create
g @ transformers@3.0.2:151
(anonymous) @ transformers@3.0.2:175
await in (anonymous)
E @ transformers@3.0.2:175
from_pretrained @ transformers@3.0.2:175
await in from_pretrained
from_pretrained @ transformers@3.0.2:175
await in from_pretrained
testSummarization @ test_messages.html:19
await in testSummarization
(anonymous) @ test_messages.html:44Understand this errorAI
transformers@3.0.2:100 2024-11-21 16:46:08.286500 [W:onnxruntime:, session_state.cc:1170 VerifyEachNodeIsAssignedToAnEp] Rerunning with verbose output on a non-minimal build will show node assignments.
or @ transformers@3.0.2:100
$func11798 @ ort-wasm-simd-threaded.jsep.wasm:0x103b449
$func1946 @ ort-wasm-simd-threaded.jsep.wasm:0x229810
$func836 @ ort-wasm-simd-threaded.jsep.wasm:0xc49fe
$func11721 @ ort-wasm-simd-threaded.jsep.wasm:0x1033705
$func630 @ ort-wasm-simd-threaded.jsep.wasm:0x8fad9
$func88 @ ort-wasm-simd-threaded.jsep.wasm:0xd635
$func14399 @ ort-wasm-simd-threaded.jsep.wasm:0x13b8e71
$func123 @ ort-wasm-simd-threaded.jsep.wasm:0x15847
$func2101 @ ort-wasm-simd-threaded.jsep.wasm:0x25f66a
$func11258 @ ort-wasm-simd-threaded.jsep.wasm:0xfed8f4
$La @ ort-wasm-simd-threaded.jsep.wasm:0xaf6beb
t.<computed> @ transformers@3.0.2:100
p._OrtCreateSession @ transformers@3.0.2:100
(anonymous) @ transformers@3.0.2:100
ed @ transformers@3.0.2:100
bd @ transformers@3.0.2:100
loadModel @ transformers@3.0.2:100
createInferenceSessionHandler @ transformers@3.0.2:100
create @ transformers@3.0.2:100
await in create
g @ transformers@3.0.2:151
(anonymous) @ transformers@3.0.2:175
await in (anonymous)
E @ transformers@3.0.2:175
from_pretrained @ transformers@3.0.2:175
await in from_pretrained
from_pretrained @ transformers@3.0.2:175
await in from_pretrained
testSummarization @ test_messages.html:19
await in testSummarization
(anonymous) @ test_messages.html:44Understand this errorAI
test_messages.html:24 Model loaded
test_messages.html:28 chat [{…}]0: {role: 'user', content: 'What is the capital of France.'}length: 1[[Prototype]]: Array(0)
test_messages.html:30 input_text Proxy(o) {ort_tensor: o}[[Handler]]: Object[[Target]]: o[[IsRevoked]]: false
test_messages.html:31 input_text.dims (2) [1, 37]0: 11: 37length: 2[[Prototype]]: Array(0)
test_messages.html:40 Error: TypeError: Cannot read properties of null (reading 'dims')
    at Function.generate (transformers@3.0.2:175:29723)
    at testSummarization (test_messages.html:34:35)
testSummarization @ test_messages.html:40
await in testSummarization
(anonymous) @ test_messages.html:44Understand this errorAI

please help.

hitchhiker3010 changed discussion status to open

Sign up or log in to comment