czd358121692 commited on
Commit
2cc6f4b
1 Parent(s): d5f2ccb

prompt updates

Browse files
Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -87,7 +87,7 @@ def get_caption(image_in):
87
  if image.mode != "RGB":
88
  image = image.convert("RGB")
89
 
90
- prompt = "<grounding>Describe this image in detail without names:"
91
  inputs = kosmos_processor(text=prompt, images=image, return_tensors="pt")
92
 
93
  device = next(kosmos_model.parameters()).device
@@ -106,13 +106,14 @@ def get_caption(image_in):
106
  processed_text, _ = kosmos_processor.post_process_generation(generated_text)
107
 
108
  # Clean up output
109
- for prefix in ["Describe this image in detail without names", "An image of", "<grounding>"]:
110
  processed_text = processed_text.replace(prefix, "").strip()
111
 
112
  return processed_text
113
 
114
  except Exception as e:
115
- raise gr.Error(f"Image caption generation failed: {str(e)}")
 
116
 
117
  # Continuing from previous code...
118
 
@@ -123,16 +124,16 @@ def get_musical_prompt(user_prompt):
123
  try:
124
  check_disk_space()
125
  standard_sys = """
126
- You are a musician AI who specializes in translating architectural spaces into musical experiences. Your job is to create concise musical descriptions that capture the essence of architectural photographs.
127
-
128
  Consider these elements in your composition:
129
  - Spatial Experience: expansive/intimate spaces, layered forms, acoustical qualities
130
  - Materials & Textures: metallic, glass, concrete translated into instrumental textures
131
  - Musical Elements: blend of classical structure and jazz improvisation
132
  - Orchestration: symphonic layers, solo instruments, or ensemble variations
133
  - Soundscapes: environmental depth and spatial audio qualities
 
 
134
 
135
- Respond immediately with a single musical prompt. No explanation, just the musical description.
136
  Examples:
137
  Input: "A curved titanium facade reflecting sunlight with flowing organic forms"
138
  Output: "Fluid jazz piano with shimmering orchestral textures, metallic percussion accents, and expansive reverb creating architectural depth"
 
87
  if image.mode != "RGB":
88
  image = image.convert("RGB")
89
 
90
+ prompt = "<grounding>Describe the visual elements in detail with rich adjectives and without names:"
91
  inputs = kosmos_processor(text=prompt, images=image, return_tensors="pt")
92
 
93
  device = next(kosmos_model.parameters()).device
 
106
  processed_text, _ = kosmos_processor.post_process_generation(generated_text)
107
 
108
  # Clean up output
109
+ for prefix in ["Describe the visual elements in detail with rich adjectives and without names", "An image of", "<grounding>"]:
110
  processed_text = processed_text.replace(prefix, "").strip()
111
 
112
  return processed_text
113
 
114
  except Exception as e:
115
+ # raise gr.Error(f"Image caption generation failed: {str(e)}")
116
+ return "A curved titanium facade reflecting sunlight with flowing organic forms" # fallback sample
117
 
118
  # Continuing from previous code...
119
 
 
124
  try:
125
  check_disk_space()
126
  standard_sys = """
127
+ You are a musician AI who specializes in translating architectural space viusal descriptions into musical prompts. Your job is to create concise musical prompts that capture the essence of it.
 
128
  Consider these elements in your composition:
129
  - Spatial Experience: expansive/intimate spaces, layered forms, acoustical qualities
130
  - Materials & Textures: metallic, glass, concrete translated into instrumental textures
131
  - Musical Elements: blend of classical structure and jazz improvisation
132
  - Orchestration: symphonic layers, solo instruments, or ensemble variations
133
  - Soundscapes: environmental depth and spatial audio qualities
134
+ Do not mention Gehry, Disney, Bilbao directly. Be poetic, creative, melodic, harmonious, rhythmic.
135
+ Respond immediately with a single musical prompt. No explanation, just the musical description.
136
 
 
137
  Examples:
138
  Input: "A curved titanium facade reflecting sunlight with flowing organic forms"
139
  Output: "Fluid jazz piano with shimmering orchestral textures, metallic percussion accents, and expansive reverb creating architectural depth"