File size: 1,476 Bytes
5548515
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.

######## Build Experiment Environment ###########
exp_dir=$(cd `dirname $0`; pwd)
work_dir=$(dirname $(dirname $(dirname $exp_dir)))

export WORK_DIR=$work_dir
export PYTHONPATH=$work_dir
export PYTHONIOENCODING=UTF-8

######## Set Experiment Configuration ###########
exp_config="$exp_dir/exp_config_v2.json"
exp_name="audioldm_debug_latent_size_4_10_78"
checkpoint_path="$work_dir/ckpts/tta/audioldm_debug_latent_size_4_10_78/checkpoints/step-0325000_loss-0.1936.pt"
output_dir="$work_dir/temp"
vocoder_config_path="$work_dir/ckpts/tta/hifigan_checkpoints/config.json"
vocoder_path="$work_dir/ckpts/tta/hifigan_checkpoints/g_01250000"
num_steps=200
guidance_scale=4.0

export CUDA_VISIBLE_DEVICES="0" 

######## Parse Command Line Arguments ###########
while [[ $# -gt 0 ]]
do
key="$1"

case $key in
    --text)
    text="$2"
    shift # past argument
    shift # past value
    ;;
    *)    # unknown option
    shift # past argument
    ;;
esac
done

######## Run inference ###########
python "${work_dir}"/bins/tta/inference.py \
    --config=$exp_config \
    --checkpoint_path=$checkpoint_path \
    --text="A man is whistling" \
    --vocoder_path=$vocoder_path \
    --vocoder_config_path=$vocoder_config_path \
    --num_steps=$num_steps \
    --guidance_scale=$guidance_scale \
    --output_dir=$output_dir \