File size: 7,065 Bytes
13240dd
 
 
 
fcd8eda
 
 
13240dd
 
 
 
 
 
 
fcd8eda
 
 
 
 
 
 
 
 
 
 
 
13240dd
fcd8eda
13240dd
 
 
 
 
 
 
 
fcd8eda
13240dd
 
 
 
 
 
 
fcd8eda
13240dd
fcd8eda
 
13240dd
 
 
 
 
 
fcd8eda
13240dd
fcd8eda
 
 
13240dd
 
 
 
 
 
fcd8eda
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13240dd
fcd8eda
 
 
 
 
 
 
 
 
 
 
 
 
 
13240dd
41c5739
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13240dd
 
 
 
 
 
 
 
 
 
 
 
 
 
fcd8eda
13240dd
 
 
 
fcd8eda
 
13240dd
fcd8eda
13240dd
 
fcd8eda
13240dd
 
fcd8eda
 
13240dd
fcd8eda
13240dd
fcd8eda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
import os
from PIL import Image
import google.generativeai as genai
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import tempfile

# Configure Google API Key and model
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)
MODEL_ID = "gemini-1.5-pro-latest"
model = genai.GenerativeModel(MODEL_ID)

# System prompts
analysis_system_prompt = "You are an expert in gender studies. Analyze the following content for any signs of gender-based discrimination and suggest actionable advice."
podcast_prompt = """You are Eva, a solo podcast host focusing on gender equality topics.
- Discuss real-life scenarios involving gender-based discrimination, provide insights, and offer solutions in a conversational, storytelling style.
- Based on the analyzed text, create an engaging solo podcast as if reading stories from different victims who send you their story.
- Introduce yourself as Eva.
- Keep the conversation within 30000 characters, with a lot of emotion.
- Use short sentences suitable for speech synthesis.
- Maintain an empathetic tone.
- Include filler words like 'äh' for a natural flow.
- Avoid background music or extra words.
"""

# Model generation configuration
generation_config = genai.GenerationConfig(
    temperature=0.9,
    top_p=1.0,
    top_k=32,
    candidate_count=1,
    max_output_tokens=8192,
)

# Safety settings
safety_settings = {
    genai.types.HarmCategory.HARM_CATEGORY_HARASSMENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    genai.types.HarmCategory.HARM_CATEGORY_HATE_SPEECH: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    genai.types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    genai.types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: genai.types.HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
}

# Analyze text
def analyze_text(text):
    prompt = f"{analysis_system_prompt}\nContent:\n{text}"
    response = model.generate_content(
        [prompt],
        generation_config=generation_config,
        safety_settings=safety_settings,
    )
    return response.text if response else "No response generated."

# Analyze image
def analyze_image(image: Image.Image) -> str:
    prompt = f"{analysis_system_prompt}\nAnalyze this image for any instances of gender-based discrimination."
    resized_image = preprocess_image(image)
    response = model.generate_content(
        [prompt, resized_image],
        generation_config=generation_config,
        safety_settings=safety_settings,
    )
    return response.text if response else "No response generated."

# Preprocess image by resizing
def preprocess_image(image: Image.Image) -> str:
    image = image.resize((512, int(image.height * 512 / image.width)))
    return "a detailed analysis of the visual content, focusing on gender-based discrimination aspects"

# Generate podcast script
def generate_podcast_script(content):
    prompt = f"{podcast_prompt}\nAnalyzed content:\n{content}"
    response = model.generate_content([prompt], generation_config=generation_config)
    script = response.text if response else "Eva has no commentary at this time."
    return script

# Convert script to audio using gTTS
def text_to_speech(script):
    lines = [line.strip() for line in script.split(".") if line.strip()]  # Split by sentences for manageable TTS segments
    audio_files = []

    for line in lines:
        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
        try:
            tts = gTTS(text=line, lang='en', tld='com')  # Using 'com' for American accent
            tts.save(temp_file.name)
            sound = AudioSegment.from_mp3(temp_file.name)
            sound += AudioSegment.silent(duration=500)  # Add a 0.5-second pause after each sentence
            sound.export(temp_file.name, format="mp3")
            audio_files.append(temp_file.name)
        except Exception as e:
            print(f"Error generating audio for line '{line}': {e}")

    combined_audio = AudioSegment.empty()
    for file in audio_files:
        sound = AudioSegment.from_mp3(file)
        combined_audio += sound
        os.remove(file)  # Clean up temporary files

    output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
    combined_audio.export(output_file.name, format="mp3")
    return output_file.name

# Generate and play podcast
def generate_and_play_podcast(content, content_type='text'):
    script = generate_podcast_script(content)
    return text_to_speech(script)

    css_style = """

   body, .gradio-container {
        background-color: #020308; /* Replace with your preferred color */
    }
    
    #logo {
        display: flex;
        justify-content: center;
        font-size: 3em;
        font-weight: bold;
        letter-spacing: 3px;
    }
    .letter {
        opacity: 0;
        animation: fadeIn 0.1s forwards;
    }
.letter.j { animation-delay: 0s; color: #4285F4; }  /* Blue */
.letter.u { animation-delay: 0.1s; color: #3A9CF1; }
.letter.s { animation-delay: 0.2s; color: #32B3EE; }
.letter.t { animation-delay: 0.3s; color: #2BC9EA; }
.letter.e { animation-delay: 0.4s; color: #23E0E7; }
.letter.v { animation-delay: 0.5s; color: #1BF7E4; }
.letter.a { animation-delay: 0.6s; color: #14F0B5; }  /* Greenish */

@keyframes fadeIn {
    0% { opacity: 0; transform: translateY(-20px); }
    100% { opacity: 1; transform: translateY(0); }
}
 """

# Gradio interface setup
with gr.Blocks(css=css_style) as app:
    gr.HTML("""
        <div id="logo">
            <span class="letter j">J</span>
            <span class="letter u">u</span>
            <span class="letter s">s</span>
            <span class="letter t">t</span>
            <span class="letter e">E</span>
            <span class="letter v">v</span>
            <span class="letter a">a</span>
        </div>
    """)
    gr.Markdown("<h1 style='text-align: center; color:#f0f0f0;'>Promotes Gender Equality in Every Conversation</h1>")

    with gr.Tab("Text Analysis"):
        text_input = gr.Textbox(label="Enter Text or Select an Example", placeholder="Type here or select an example...", lines=4)
        text_output = gr.Textbox(label="Analysis Output", lines=6)
        analyze_text_btn = gr.Button("Analyze Text")
        listen_podcast_btn = gr.Button("Listen to Eva")
        
        analyze_text_btn.click(analyze_text, inputs=text_input, outputs=text_output)
        listen_podcast_btn.click(generate_and_play_podcast, inputs=text_output, outputs=gr.Audio())

    with gr.Tab("Image Analysis"):
        image_input = gr.Image(label="Upload Image (e.g., screenshot, photos, etc.)", type="pil")
        image_output = gr.Textbox(label="Analysis Output", lines=6)
        analyze_image_btn = gr.Button("Analyze Image")
        listen_podcast_image_btn = gr.Button("Listen to Eva")

        analyze_image_btn.click(analyze_image, inputs=image_input, outputs=image_output)
        listen_podcast_image_btn.click(generate_and_play_podcast, inputs=image_output, outputs=gr.Audio())

app.launch()