animalese_RVC

Sleeping

File size: 2,147 Bytes

b3dc4d7
 
 
 
 
c0560f8

import re
import numpy as np
import wave


class Animalese:
    def __init__(self, letters_file, onload):
        with open(letters_file, 'rb') as f:
            self.letter_library = np.frombuffer(f.read(), dtype=np.uint8)
        onload()

    def synthesize(self, script, shorten=False, pitch=1.0):
        def shorten_word(word):
            return word[0] + word[-1] if len(word) > 1 else word

        if shorten:
            # Replace all non-alphabet characters with spaces and split the script into words
            words = re.sub(r'[^a-zA-Z]', ' ', script).split()
            # Shorten each word and join them back into a single string
            processed_script = "".join(map(shorten_word, words))
        else:
            processed_script = script

        data = []

        sample_freq = 44100
        library_letter_secs = 0.15
        library_samples_per_letter = int(library_letter_secs * sample_freq)
        output_letter_secs = 0.075
        output_samples_per_letter = int(output_letter_secs * sample_freq)

        for c in processed_script.upper():
            if 'A' <= c <= 'Z':
                library_letter_start = library_samples_per_letter * (ord(c) - ord('A'))
                for i in range(output_samples_per_letter):
                    data.append(self.letter_library[44 + library_letter_start + int(i * pitch)])
            else:
                data.extend([127] * output_samples_per_letter)

        # Create the .wav file data
        data = np.array(data, dtype=np.uint8)
        return self.create_wave(data, sample_freq)
    
    def create_wave(self, data, sample_rate):
        output_file = "output.wav"
        with wave.open(output_file, "wb") as f:
            f.setnchannels(1)
            f.setsampwidth(1)
            f.setframerate(sample_rate)
            f.writeframes(data.tobytes())
        return output_file

# Initialize the synthesizer
synth = Animalese('animalese.wav', lambda: print("Loaded"))

def generate_audio(text, shorten, pitch):
    return synth.synthesize(text, shorten, pitch)

def preview_audio(audio_file):
    with open(audio_file, 'rb') as f:
        return f.read()