knowledge-scribe

Sleeping

dwb2023 commited on 29 days ago

Commit

4c90570

•

1 Parent(s): 793d15e

yadda yadda... getting rid of flash attention for now

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,40 +25,19 @@ MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8  # Optimized for better GPU utilization
 YT_LENGTH_LIMIT_S = 10800  # 3 hours
 DATASET_NAME = "dwb2023/yt-transcripts-v3"
 # Environment setup
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
-# Model setup
-model = AutoModelForSpeechSeq2Seq.from_pretrained(
-    MODEL_NAME,
-    use_cache=False,
-    device_map="auto",
-    low_cpu_mem_usage=True,
-    attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16
-)
-# Flash Attention setup for memory and speed optimization if supported
-try:
-    from flash_attn import flash_attn_fn
-    model.config.use_flash_attention = True
-except ImportError:
-    print("Flash Attention is not available. Proceeding without it.")
-# Note: torch.compile is not compatible with Flash Attention or the chunked long-form algorithm.
-# Processor setup
-tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_NAME)
 # Pipeline setup
 pipe = pipeline(
     task="automatic-speech-recognition",
-    model=model,
-    tokenizer=tokenizer,
-    feature_extractor=feature_extractor,
-    chunk_length_s=30,  # 30 seconds
 )
 def reset_and_update_dataset(new_data):

 BATCH_SIZE = 8  # Optimized for better GPU utilization
 YT_LENGTH_LIMIT_S = 10800  # 3 hours
 DATASET_NAME = "dwb2023/yt-transcripts-v3"
+FILE_LIMIT_MB = 1000
 # Environment setup
 os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
+device = 0 if torch.cuda.is_available() else "cpu"
 # Pipeline setup
 pipe = pipeline(
     task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
 )
 def reset_and_update_dataset(new_data):