YifeiXin commited on
Commit
d944d42
1 Parent(s): b9c58e1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -5
README.md CHANGED
@@ -103,10 +103,10 @@ def inference(args):
103
  audio_video_tensor = preprocess(audio_video_path)
104
  else:
105
  audio_video_tensor = preprocess(audio_video_path, va=True if args.modal_type == "av" else False)
106
- question = f"Please describe the video with visual and audio information."
107
 
108
  # Audio Inference
109
- audio_video_path = "assets/Traffic and pedestrians.wav"
110
  preprocess = processor['audio' if args.modal_type == "a" else "video"]
111
  if args.modal_type == "a":
112
  audio_video_tensor = preprocess(audio_video_path)
@@ -115,13 +115,13 @@ def inference(args):
115
  question = f"Please describe the audio."
116
 
117
  # Video Inference
118
- audio_video_path = "assets/WBS4I.mp4"
119
  preprocess = processor['audio' if args.modal_type == "a" else "video"]
120
  if args.modal_type == "a":
121
  audio_video_tensor = preprocess(audio_video_path)
122
  else:
123
  audio_video_tensor = preprocess(audio_video_path, va=True if args.modal_type == "av" else False)
124
- question = f"Please describe the video."
125
 
126
  output = mm_infer(
127
  audio_video_tensor,
@@ -138,11 +138,12 @@ def inference(args):
138
  if __name__ == "__main__":
139
  parser = argparse.ArgumentParser()
140
 
141
- parser.add_argument('--model-path', help='', required=True)
142
  parser.add_argument('--modal-type', choices=["a", "v", "av"], help='', required=True)
143
  args = parser.parse_args()
144
 
145
  inference(args)
 
146
  ```
147
 
148
 
 
103
  audio_video_tensor = preprocess(audio_video_path)
104
  else:
105
  audio_video_tensor = preprocess(audio_video_path, va=True if args.modal_type == "av" else False)
106
+ question = f"Please describe the video with audio information."
107
 
108
  # Audio Inference
109
+ audio_video_path = "assets/bird-twitter-car.wav"
110
  preprocess = processor['audio' if args.modal_type == "a" else "video"]
111
  if args.modal_type == "a":
112
  audio_video_tensor = preprocess(audio_video_path)
 
115
  question = f"Please describe the audio."
116
 
117
  # Video Inference
118
+ audio_video_path = "assets/output_v_1jgsRbGzCls.mp4"
119
  preprocess = processor['audio' if args.modal_type == "a" else "video"]
120
  if args.modal_type == "a":
121
  audio_video_tensor = preprocess(audio_video_path)
122
  else:
123
  audio_video_tensor = preprocess(audio_video_path, va=True if args.modal_type == "av" else False)
124
+ question = f"What activity are the people practicing in the video?"
125
 
126
  output = mm_infer(
127
  audio_video_tensor,
 
138
  if __name__ == "__main__":
139
  parser = argparse.ArgumentParser()
140
 
141
+ parser.add_argument('--model-path', help='', , required=False, default='DAMO-NLP-SG/VideoLLaMA2.1-7B-AV')
142
  parser.add_argument('--modal-type', choices=["a", "v", "av"], help='', required=True)
143
  args = parser.parse_args()
144
 
145
  inference(args)
146
+
147
  ```
148
 
149