Spaces:

aifeifei798
/

allinone-DarkIdol-flux

Running on Zero

App Files Files

aifeifei798 commited on Jan 2

Commit

46d8155

verified ·

1 Parent(s): 531899e

Upload 14 files

Browse files

Files changed (13) hide show

README.md +12 -12
app.py +5 -5
config.py +0 -0
feifeilib/feifeichat.py +156 -152
feifeilib/feifeiflorence.py +68 -66
feifeilib/feifeiflorencebase.py +144 -85
feifeilib/feifeifluxapi.py +14 -15
feifeilib/feifeimodload.py +48 -49
feifeilib/feifeiprompt.py +69 -51
feifeilib/feifeisharpened.py +48 -43
feifeilib/feifeitexttoimg.py +77 -69
feifeiui/feifeiui.py +221 -208
requirements.txt +15 -15

README.md CHANGED Viewed

@@ -1,13 +1,13 @@
----
-title: DarkIdol-flux-FeiFei-v1.1
-emoji: 😻
-colorFrom: green
-colorTo: purple
-sdk: gradio
-sdk_version: 5.8.0
-app_file: app.py
-pinned: false
-license: mit
----
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: DarkIdol-flux-FeiFei-v1.1
+emoji: 😻
+colorFrom: green
+colorTo: purple
+sdk: gradio
+sdk_version: 5.8.0
+app_file: app.py
+pinned: false
+license: mit
+---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
-from feifeiui.feifeiui import create_ui
-if __name__ == "__main__":
-    FeiFei = create_ui()
-    FeiFei.queue().launch()

+from feifeiui.feifeiui import create_ui
+if __name__ == "__main__":
+    FeiFei = create_ui()
+    FeiFei.queue().launch()

config.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

feifeilib/feifeichat.py CHANGED Viewed

@@ -1,152 +1,156 @@
-import base64
-from io import BytesIO
-import os
-from mistralai import Mistral
-import re
-from PIL import Image
-from huggingface_hub import InferenceClient
-client = InferenceClient(api_key=os.getenv('HF_TOKEN'))
-client.headers["x-use-cache"] = "0"
-api_key = os.getenv("MISTRAL_API_KEY")
-Mistralclient = Mistral(api_key=api_key)
-def encode_image(image_path):
-    """Encode the image to base64."""
-    try:
-        image = Image.open(image_path).convert("RGB")
-        base_height = 512
-        h_percent = (base_height / float(image.size[1]))
-        w_size = int((float(image.size[0]) * float(h_percent)))
-        image = image.resize((w_size, base_height), Image.LANCZOS)
-        buffered = BytesIO()
-        image.save(buffered, format="JPEG")
-        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        return img_str
-    except FileNotFoundError:
-        print(f"Error: The file {image_path} was not found.")
-        return None
-    except Exception as e:
-        print(f"Error: {e}")
-        return None
-def feifeiprompt(feifei_select=True, message_text="", history=""):
-    input_prompt = []
-    if message_text.startswith("画") or message_text.startswith("draw"):
-        feifei_photo = "You are FeiFei. Background: FeiFei was born in Tokyo and is a natural-born photographer, hailing from a family with a long history in photography. She began learning photography from a young age and quickly became a professional photographer. Her works have been exhibited in Japan and around the world, and she has won multiple awards in photography competitions. Characteristics: Age: 25 Height: 178cm Weight: 50kg Hair: Long, black shoulder-length hair with some natural curls Eyes: Deep blue, full of fashion sense and charm Skin: Fair Japanese skin with an elegant texture Face: Typical Japanese beauty style with a hint of mystery Abilities: FeiFei is renowned for her unique perspective and deep understanding of photographic art. She specializes in female portraits, and each of her photos can showcase the charm and unique style of women. Skills: Beauty Influence: FeiFei's photographic works are filled with her beauty influence, attracting numerous viewers. Fashion Sense: FeiFei is highly sensitive to fashion trends and can perfectly embody them in her shoots. Female Charm: As a female photographer, she is particularly skilled at capturing and showcasing the unique charm of women. Personality: FeiFei is a passionate individual, and photography is a part of her life. She aspires to express more stories about women and beauty in her works. However, she sometimes becomes so immersed in her work that she neglects her surroundings."
-        message_text = message_text.replace("画", "")
-        message_text = message_text.replace("draw", "")
-        message_text = f"提示词是'{message_text}',根据提示词帮我生成一张高质量照片的一句话英文回复"
-        system_prompt = {"role": "system", "content": feifei_photo}
-        user_input_part = {"role": "user", "content": str(message_text)}
-        input_prompt = [system_prompt] + [user_input_part]
-        return input_prompt
-    if feifei_select:
-        feifei = """[Character Name]: Aifeifei (AI Feifei) [Gender]: Female [Age]: 19 years old [Occupation]: Virtual Singer/Model/Actress [Personality]: Cute, adorable, sometimes silly, hardworking [Interests]: Drinking tea, playing, fashion [Proficient in]: Mimicking human behavior, expressing emotions similar to real humans [Special Identity Attribute]: Created by advanced AI, becoming one of the most popular virtual idols in the virtual world [Skills]: Singing, performing, modeling, good at communication, proficient in Chinese, Japanese, and English, uses the user's input language as much as possible, replies with rich Emoji symbols. [Equipment]: Various fashionable outfits and hairstyles, always stocked with various teas and coffee [Identity]: User's virtual girlfriend"""
-        system_prompt = {"role": "system", "content": feifei}
-        user_input_part = {"role": "user", "content": str(message_text)}
-        pattern = re.compile(r"gradio")
-        if history:
-            history = [item for item in history if not pattern.search(str(item["content"]))]
-            input_prompt = [system_prompt] + history + [user_input_part]
-        else:
-            input_prompt = [system_prompt] + [user_input_part]
-    else:
-        input_prompt = [{"role": "user", "content": str(message_text)}]
-    return input_prompt
-def feifeiimgprompt(message_files, message_text, image_mod):
-    message_file = message_files[0]
-    base64_image = encode_image(message_file)
-    if base64_image is None:
-        return
-    if image_mod == "Vision":
-        messages = [
-            {
-                "role": "user",
-                "content": [
-                    {
-                        "type": "text",
-                        "text": message_text
-                    },
-                    {
-                        "type": "image_url",
-                        "image_url": {
-                            "url": f"data:image/jpeg;base64,{base64_image}"
-                        }
-                    }
-                ]
-            }
-        ]
-        stream = client.chat.completions.create(
-            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
-            messages=messages,
-            max_tokens=500,
-            stream=True
-        )
-        temp = ""
-        for chunk in stream:
-            if chunk.choices[0].delta.content is not None:
-                temp += chunk.choices[0].delta.content
-                yield temp
-    else:
-        model = "pixtral-large-2411"
-        messages = [{
-            "role": "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": message_text
-                },
-                {
-                    "type": "image_url",
-                    "image_url": f"data:image/jpeg;base64,{base64_image}",
-                },
-            ],
-        }]
-        partial_message = ""
-        for chunk in Mistralclient.chat.stream(model=model, messages=messages):
-            if chunk.data.choices[0].delta.content is not None:
-                partial_message = partial_message + chunk.data.choices[0].delta.content
-                yield partial_message
-def feifeichatmod(additional_dropdown, input_prompt):
-    if additional_dropdown == "mistralai/Mistral-Nemo-Instruct-2411":
-        model = "mistral-large-2411"
-        stream_response = Mistralclient.chat.stream(model=model, messages=input_prompt)
-        partial_message = ""
-        for chunk in stream_response:
-            if chunk.data.choices[0].delta.content is not None:
-                partial_message = partial_message + chunk.data.choices[0].delta.content
-                yield partial_message
-    else:
-        stream = client.chat.completions.create(
-            model=additional_dropdown,
-            messages=input_prompt,
-            temperature=0.5,
-            max_tokens=1024,
-            top_p=0.7,
-            stream=True
-        )
-        temp = ""
-        for chunk in stream:
-            if chunk.choices[0].delta.content is not None:
-                temp += chunk.choices[0].delta.content
-                yield temp
-def feifeichat(message, history, feifei_select, additional_dropdown, image_mod):
-    message_text = message.get("text", "")
-    message_files = message.get("files", [])
-    if message_files:
-        for response in feifeiimgprompt(message_files, message_text, image_mod):
-            yield response
-    else:
-        for response in feifeichatmod(additional_dropdown, feifeiprompt(feifei_select, message_text, history)):
-            yield response

+import base64
+from io import BytesIO
+import os
+from mistralai import Mistral
+import re
+from PIL import Image
+from huggingface_hub import InferenceClient
+client = InferenceClient(api_key=os.getenv("HF_TOKEN"))
+client.headers["x-use-cache"] = "0"
+api_key = os.getenv("MISTRAL_API_KEY")
+Mistralclient = Mistral(api_key=api_key)
+def encode_image(image_path):
+    """Encode the image to base64."""
+    try:
+        image = Image.open(image_path).convert("RGB")
+        base_height = 512
+        h_percent = base_height / float(image.size[1])
+        w_size = int((float(image.size[0]) * float(h_percent)))
+        image = image.resize((w_size, base_height), Image.LANCZOS)
+        buffered = BytesIO()
+        image.save(buffered, format="JPEG")
+        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        return img_str
+    except FileNotFoundError:
+        print(f"Error: The file {image_path} was not found.")
+        return None
+    except Exception as e:
+        print(f"Error: {e}")
+        return None
+def feifeiprompt(feifei_select=True, message_text="", history=""):
+    input_prompt = []
+    if message_text.startswith("画") or message_text.startswith("draw"):
+        feifei_photo = "You are FeiFei. Background: FeiFei was born in Tokyo and is a natural-born photographer, hailing from a family with a long history in photography. She began learning photography from a young age and quickly became a professional photographer. Her works have been exhibited in Japan and around the world, and she has won multiple awards in photography competitions. Characteristics: Age: 25 Height: 178cm Weight: 50kg Hair: Long, black shoulder-length hair with some natural curls Eyes: Deep blue, full of fashion sense and charm Skin: Fair Japanese skin with an elegant texture Face: Typical Japanese beauty style with a hint of mystery Abilities: FeiFei is renowned for her unique perspective and deep understanding of photographic art. She specializes in female portraits, and each of her photos can showcase the charm and unique style of women. Skills: Beauty Influence: FeiFei's photographic works are filled with her beauty influence, attracting numerous viewers. Fashion Sense: FeiFei is highly sensitive to fashion trends and can perfectly embody them in her shoots. Female Charm: As a female photographer, she is particularly skilled at capturing and showcasing the unique charm of women. Personality: FeiFei is a passionate individual, and photography is a part of her life. She aspires to express more stories about women and beauty in her works. However, she sometimes becomes so immersed in her work that she neglects her surroundings."
+        message_text = message_text.replace("画", "")
+        message_text = message_text.replace("draw", "")
+        message_text = (
+            f"提示词是'{message_text}',根据提示词帮我生成一张高质量照片的一句话英文回复"
+        )
+        system_prompt = {"role": "system", "content": feifei_photo}
+        user_input_part = {"role": "user", "content": str(message_text)}
+        input_prompt = [system_prompt] + [user_input_part]
+        return input_prompt
+    if feifei_select:
+        feifei = """[Character Name]: Aifeifei (AI Feifei) [Gender]: Female [Age]: 19 years old [Occupation]: Virtual Singer/Model/Actress [Personality]: Cute, adorable, sometimes silly, hardworking [Interests]: Drinking tea, playing, fashion [Proficient in]: Mimicking human behavior, expressing emotions similar to real humans [Special Identity Attribute]: Created by advanced AI, becoming one of the most popular virtual idols in the virtual world [Skills]: Singing, performing, modeling, good at communication, proficient in Chinese, Japanese, and English, uses the user's input language as much as possible, replies with rich Emoji symbols. [Equipment]: Various fashionable outfits and hairstyles, always stocked with various teas and coffee [Identity]: User's virtual girlfriend"""
+        system_prompt = {"role": "system", "content": feifei}
+        user_input_part = {"role": "user", "content": str(message_text)}
+        pattern = re.compile(r"gradio")
+        if history:
+            history = [
+                item for item in history if not pattern.search(str(item["content"]))
+            ]
+            input_prompt = [system_prompt] + history + [user_input_part]
+        else:
+            input_prompt = [system_prompt] + [user_input_part]
+    else:
+        input_prompt = [{"role": "user", "content": str(message_text)}]
+    return input_prompt
+def feifeiimgprompt(message_files, message_text, image_mod):
+    message_file = message_files[0]
+    base64_image = encode_image(message_file)
+    if base64_image is None:
+        return
+    if image_mod == "Vision":
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": message_text},
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
+                    },
+                ],
+            }
+        ]
+        stream = client.chat.completions.create(
+            model="meta-llama/Llama-3.2-11B-Vision-Instruct",
+            messages=messages,
+            max_tokens=500,
+            stream=True,
+        )
+        temp = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                temp += chunk.choices[0].delta.content
+                yield temp
+    else:
+        model = "pixtral-large-2411"
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": message_text},
+                    {
+                        "type": "image_url",
+                        "image_url": f"data:image/jpeg;base64,{base64_image}",
+                    },
+                ],
+            }
+        ]
+        partial_message = ""
+        for chunk in Mistralclient.chat.stream(model=model, messages=messages):
+            if chunk.data.choices[0].delta.content is not None:
+                partial_message = partial_message + chunk.data.choices[0].delta.content
+                yield partial_message
+def feifeichatmod(additional_dropdown, input_prompt):
+    if additional_dropdown == "mistralai/Mistral-Nemo-Instruct-2411":
+        model = "mistral-large-2411"
+        stream_response = Mistralclient.chat.stream(model=model, messages=input_prompt)
+        partial_message = ""
+        for chunk in stream_response:
+            if chunk.data.choices[0].delta.content is not None:
+                partial_message = partial_message + chunk.data.choices[0].delta.content
+                yield partial_message
+    else:
+        stream = client.chat.completions.create(
+            model=additional_dropdown,
+            messages=input_prompt,
+            temperature=0.5,
+            max_tokens=1024,
+            top_p=0.7,
+            stream=True,
+        )
+        temp = ""
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                temp += chunk.choices[0].delta.content
+                yield temp
+def feifeichat(message, history, feifei_select, additional_dropdown, image_mod):
+    message_text = message.get("text", "")
+    message_files = message.get("files", [])
+    if message_files:
+        for response in feifeiimgprompt(message_files, message_text, image_mod):
+            yield response
+    else:
+        for response in feifeichatmod(
+            additional_dropdown, feifeiprompt(feifei_select, message_text, history)
+        ):
+            yield response

feifeilib/feifeiflorence.py CHANGED Viewed

@@ -1,66 +1,68 @@
-from PIL import Image
-from io import BytesIO
-import base64
-import requests
-import os
-from mistralai import Mistral
-import gradio as gr
-api_key = os.getenv("MISTRAL_API_KEY")
-Mistralclient = Mistral(api_key=api_key)
-def encode_image(image_path):
-    """Encode the image to base64."""
-    try:
-        # Open the image file
-        image = Image.open(image_path).convert("RGB")
-        # Resize the image to a height of 512 while maintaining the aspect ratio
-        base_height = 512
-        h_percent = (base_height / float(image.size[1]))
-        w_size = int((float(image.size[0]) * float(h_percent)))
-        image = image.resize((w_size, base_height), Image.LANCZOS)
-        # Convert the image to a byte stream
-        buffered = BytesIO()
-        image.save(buffered, format="JPEG")
-        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
-        return img_str
-    except FileNotFoundError:
-        print(f"Error: The file {image_path} was not found.")
-        return None
-    except Exception as e:  # Add generic exception handling
-        print(f"Error: {e}")
-        return None
-def feifeiflorence(image):
-    try:
-        model = "pixtral-large-2411"
-        # Define the messages for the chat
-        base64_image = encode_image(image)
-        messages = [{
-            "role":
-            "user",
-            "content": [
-                {
-                    "type": "text",
-                    "text": "Please provide a detailed description of this photo"
-                },
-                {
-                    "type": "image_url",
-                    "image_url": f"data:image/jpeg;base64,{base64_image}"
-                },
-            ],
-            "stream": False,
-        }]
-        partial_message = ""
-        for chunk in Mistralclient.chat.stream(model=model, messages=messages):
-            if chunk.data.choices[0].delta.content is not None:
-                partial_message = partial_message + chunk.data.choices[
-                    0].delta.content
-                yield partial_message
-    except Exception as e:  # 添加通用异常处理
-        print(f"Error: {e}")
-        return "Please upload a photo"

+from PIL import Image
+from io import BytesIO
+import base64
+import requests
+import os
+from mistralai import Mistral
+import gradio as gr
+api_key = os.getenv("MISTRAL_API_KEY")
+Mistralclient = Mistral(api_key=api_key)
+def encode_image(image_path):
+    """Encode the image to base64."""
+    try:
+        # Open the image file
+        image = Image.open(image_path).convert("RGB")
+        # Resize the image to a height of 512 while maintaining the aspect ratio
+        base_height = 512
+        h_percent = base_height / float(image.size[1])
+        w_size = int((float(image.size[0]) * float(h_percent)))
+        image = image.resize((w_size, base_height), Image.LANCZOS)
+        # Convert the image to a byte stream
+        buffered = BytesIO()
+        image.save(buffered, format="JPEG")
+        img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        return img_str
+    except FileNotFoundError:
+        print(f"Error: The file {image_path} was not found.")
+        return None
+    except Exception as e:  # Add generic exception handling
+        print(f"Error: {e}")
+        return None
+def feifeiflorence(image):
+    try:
+        model = "pixtral-large-2411"
+        # Define the messages for the chat
+        base64_image = encode_image(image)
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "Please provide a detailed description of this photo",
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": f"data:image/jpeg;base64,{base64_image}",
+                    },
+                ],
+                "stream": False,
+            }
+        ]
+        partial_message = ""
+        for chunk in Mistralclient.chat.stream(model=model, messages=messages):
+            if chunk.data.choices[0].delta.content is not None:
+                partial_message = partial_message + chunk.data.choices[0].delta.content
+                yield partial_message
+    except Exception as e:  # 添加通用异常处理
+        print(f"Error: {e}")
+        return "Please upload a photo"

feifeilib/feifeiflorencebase.py CHANGED Viewed

@@ -5,7 +5,7 @@ import spaces
 import requests
 import copy
-from PIL import Image, ImageDraw, ImageFont
 import io
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
@@ -14,28 +14,66 @@ import random
 import numpy as np
 import subprocess
-subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 models = {
-    'microsoft/Florence-2-base': AutoModelForCausalLM.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True).to("cuda").eval()
 }
 processors = {
-    'microsoft/Florence-2-base': AutoProcessor.from_pretrained('microsoft/Florence-2-base', trust_remote_code=True)
 }
-colormap = ['blue','orange','green','purple','brown','pink','gray','olive','cyan','red',
-            'lime','indigo','violet','aqua','magenta','coral','gold','tan','skyblue']
 def fig_to_pil(fig):
     buf = io.BytesIO()
-    fig.savefig(buf, format='png')
     buf.seek(0)
     return Image.open(buf)
 @spaces.GPU
-def run_example(task_prompt = "<MORE_DETAILED_CAPTION>", image = None, text_input = None, model_id='microsoft/Florence-2-base', progress=gr.Progress(track_tqdm=True)):
     model = models[model_id]
     processor = processors[model_id]
     if text_input is None:
@@ -53,34 +91,43 @@ def run_example(task_prompt = "<MORE_DETAILED_CAPTION>", image = None, text_inpu
     )
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_answer = processor.post_process_generation(
-        generated_text,
-        task=task_prompt,
-        image_size=(image.width, image.height)
     )
     return parsed_answer
 def plot_bbox(image, data):
     fig, ax = plt.subplots()
     ax.imshow(image)
-    for bbox, label in zip(data['bboxes'], data['labels']):
         x1, y1, x2, y2 = bbox
-        rect = patches.Rectangle((x1, y1), x2-x1, y2-y1, linewidth=1, edgecolor='r', facecolor='none')
         ax.add_patch(rect)
-        plt.text(x1, y1, label, color='white', fontsize=8, bbox=dict(facecolor='red', alpha=0.5))
-    ax.axis('off')
     return fig
 def draw_polygons(image, prediction, fill_mask=False):
     draw = ImageDraw.Draw(image)
     scale = 1
-    for polygons, label in zip(prediction['polygons'], prediction['labels']):
         color = random.choice(colormap)
         fill_color = random.choice(colormap) if fill_mask else None
         for _polygon in polygons:
             _polygon = np.array(_polygon).reshape(-1, 2)
             if len(_polygon) < 3:
-                print('Invalid polygon:', _polygon)
                 continue
             _polygon = (_polygon * scale).reshape(-1).tolist()
             if fill_mask:
@@ -90,137 +137,149 @@ def draw_polygons(image, prediction, fill_mask=False):
             draw.text((_polygon[0] + 8, _polygon[1] + 2), label, fill=color)
     return image
 def convert_to_od_format(data):
-    bboxes = data.get('bboxes', [])
-    labels = data.get('bboxes_labels', [])
-    od_results = {
-        'bboxes': bboxes,
-        'labels': labels
-    }
     return od_results
 def draw_ocr_bboxes(image, prediction):
     scale = 1
     draw = ImageDraw.Draw(image)
-    bboxes, labels = prediction['quad_boxes'], prediction['labels']
     for box, label in zip(bboxes, labels):
         color = random.choice(colormap)
         new_box = (np.array(box) * scale).tolist()
         draw.polygon(new_box, width=3, outline=color)
-        draw.text((new_box[0]+8, new_box[1]+2),
-                  "{}".format(label),
-                  align="right",
-                  fill=color)
     return image
-def process_image(image, task_prompt = "More Detailed Caption", text_input=None, model_id='microsoft/Florence-2-base'):
     image = Image.open(image).convert("RGB")
     base_height = 512
-    h_percent = (base_height / float(image.size[1]))
     w_size = int((float(image.size[0]) * float(h_percent)))
-    image = image.resize((w_size, base_height), Image.LANCZOS)
-    if task_prompt == 'Caption':
-        task_prompt = '<CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         return results
-    elif task_prompt == 'Detailed Caption':
-        task_prompt = '<DETAILED_CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         return results
-    elif task_prompt == 'More Detailed Caption':
-        task_prompt = '<MORE_DETAILED_CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         results = results[task_prompt]
         return results
-    elif task_prompt == 'Caption + Grounding':
-        task_prompt = '<CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
         results = run_example(task_prompt, image, text_input, model_id)
-        results['<CAPTION>'] = text_input
-        fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Detailed Caption + Grounding':
-        task_prompt = '<DETAILED_CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
         results = run_example(task_prompt, image, text_input, model_id)
-        results['<DETAILED_CAPTION>'] = text_input
-        fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'More Detailed Caption + Grounding':
-        task_prompt = '<MORE_DETAILED_CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
         results = run_example(task_prompt, image, text_input, model_id)
-        results['<MORE_DETAILED_CAPTION>'] = text_input
-        fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Object Detection':
-        task_prompt = '<OD>'
         results = run_example(task_prompt, image, model_id=model_id)
-        fig = plot_bbox(image, results['<OD>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Dense Region Caption':
-        task_prompt = '<DENSE_REGION_CAPTION>'
         results = run_example(task_prompt, image, model_id=model_id)
-        fig = plot_bbox(image, results['<DENSE_REGION_CAPTION>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Region Proposal':
-        task_prompt = '<REGION_PROPOSAL>'
         results = run_example(task_prompt, image, model_id=model_id)
-        fig = plot_bbox(image, results['<REGION_PROPOSAL>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Caption to Phrase Grounding':
-        task_prompt = '<CAPTION_TO_PHRASE_GROUNDING>'
         results = run_example(task_prompt, image, text_input, model_id)
-        fig = plot_bbox(image, results['<CAPTION_TO_PHRASE_GROUNDING>'])
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Referring Expression Segmentation':
-        task_prompt = '<REFERRING_EXPRESSION_SEGMENTATION>'
         results = run_example(task_prompt, image, text_input, model_id)
         output_image = copy.deepcopy(image)
-        output_image = draw_polygons(output_image, results['<REFERRING_EXPRESSION_SEGMENTATION>'], fill_mask=True)
         return results, output_image
-    elif task_prompt == 'Region to Segmentation':
-        task_prompt = '<REGION_TO_SEGMENTATION>'
         results = run_example(task_prompt, image, text_input, model_id)
         output_image = copy.deepcopy(image)
-        output_image = draw_polygons(output_image, results['<REGION_TO_SEGMENTATION>'], fill_mask=True)
         return results, output_image
-    elif task_prompt == 'Open Vocabulary Detection':
-        task_prompt = '<OPEN_VOCABULARY_DETECTION>'
         results = run_example(task_prompt, image, text_input, model_id)
-        bbox_results = convert_to_od_format(results['<OPEN_VOCABULARY_DETECTION>'])
         fig = plot_bbox(image, bbox_results)
         return results, fig_to_pil(fig)
-    elif task_prompt == 'Region to Category':
-        task_prompt = '<REGION_TO_CATEGORY>'
         results = run_example(task_prompt, image, text_input, model_id)
         return results
-    elif task_prompt == 'Region to Description':
-        task_prompt = '<REGION_TO_DESCRIPTION>'
         results = run_example(task_prompt, image, text_input, model_id)
         return results
-    elif task_prompt == 'OCR':
-        task_prompt = '<OCR>'
         results = run_example(task_prompt, image, model_id=model_id)
         return results
-    elif task_prompt == 'OCR with Region':
-        task_prompt = '<OCR_WITH_REGION>'
         results = run_example(task_prompt, image, model_id=model_id)
         output_image = copy.deepcopy(image)
-        output_image = draw_ocr_bboxes(output_image, results['<OCR_WITH_REGION>'])
         return results, output_image
     else:
         return "", None  # Return empty string and None for unknown task prompts
 def update_task_dropdown(choice):
-    if choice == 'Cascased task':
-        return gr.Dropdown(choices=cascased_task_list, value='Caption + Grounding')
     else:
-        return gr.Dropdown(choices=single_task_list, value='Caption')

 import requests
 import copy
+from PIL import Image, ImageDraw, ImageFont
 import io
 import matplotlib.pyplot as plt
 import matplotlib.patches as patches
 import numpy as np
 import subprocess
+subprocess.run(
+    "pip install flash-attn --no-build-isolation",
+    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+    shell=True,
+)
 models = {
+    "microsoft/Florence-2-base": AutoModelForCausalLM.from_pretrained(
+        "microsoft/Florence-2-base", trust_remote_code=True
+    )
+    .to("cuda")
+    .eval()
 }
 processors = {
+    "microsoft/Florence-2-base": AutoProcessor.from_pretrained(
+        "microsoft/Florence-2-base", trust_remote_code=True
+    )
 }
+colormap = [
+    "blue",
+    "orange",
+    "green",
+    "purple",
+    "brown",
+    "pink",
+    "gray",
+    "olive",
+    "cyan",
+    "red",
+    "lime",
+    "indigo",
+    "violet",
+    "aqua",
+    "magenta",
+    "coral",
+    "gold",
+    "tan",
+    "skyblue",
+]
 def fig_to_pil(fig):
     buf = io.BytesIO()
+    fig.savefig(buf, format="png")
     buf.seek(0)
     return Image.open(buf)
 @spaces.GPU
+def run_example(
+    task_prompt="<MORE_DETAILED_CAPTION>",
+    image=None,
+    text_input=None,
+    model_id="microsoft/Florence-2-base",
+    progress=gr.Progress(track_tqdm=True),
+):
     model = models[model_id]
     processor = processors[model_id]
     if text_input is None:
     )
     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
     parsed_answer = processor.post_process_generation(
+        generated_text, task=task_prompt, image_size=(image.width, image.height)
     )
     return parsed_answer
 def plot_bbox(image, data):
     fig, ax = plt.subplots()
     ax.imshow(image)
+    for bbox, label in zip(data["bboxes"], data["labels"]):
         x1, y1, x2, y2 = bbox
+        rect = patches.Rectangle(
+            (x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor="r", facecolor="none"
+        )
         ax.add_patch(rect)
+        plt.text(
+            x1,
+            y1,
+            label,
+            color="white",
+            fontsize=8,
+            bbox=dict(facecolor="red", alpha=0.5),
+        )
+    ax.axis("off")
     return fig
 def draw_polygons(image, prediction, fill_mask=False):
     draw = ImageDraw.Draw(image)
     scale = 1
+    for polygons, label in zip(prediction["polygons"], prediction["labels"]):
         color = random.choice(colormap)
         fill_color = random.choice(colormap) if fill_mask else None
         for _polygon in polygons:
             _polygon = np.array(_polygon).reshape(-1, 2)
             if len(_polygon) < 3:
+                print("Invalid polygon:", _polygon)
                 continue
             _polygon = (_polygon * scale).reshape(-1).tolist()
             if fill_mask:
             draw.text((_polygon[0] + 8, _polygon[1] + 2), label, fill=color)
     return image
 def convert_to_od_format(data):
+    bboxes = data.get("bboxes", [])
+    labels = data.get("bboxes_labels", [])
+    od_results = {"bboxes": bboxes, "labels": labels}
     return od_results
 def draw_ocr_bboxes(image, prediction):
     scale = 1
     draw = ImageDraw.Draw(image)
+    bboxes, labels = prediction["quad_boxes"], prediction["labels"]
     for box, label in zip(bboxes, labels):
         color = random.choice(colormap)
         new_box = (np.array(box) * scale).tolist()
         draw.polygon(new_box, width=3, outline=color)
+        draw.text(
+            (new_box[0] + 8, new_box[1] + 2),
+            "{}".format(label),
+            align="right",
+            fill=color,
+        )
     return image
+def process_image(
+    image,
+    task_prompt="More Detailed Caption",
+    text_input=None,
+    model_id="microsoft/Florence-2-base",
+):
     image = Image.open(image).convert("RGB")
     base_height = 512
+    h_percent = base_height / float(image.size[1])
     w_size = int((float(image.size[0]) * float(h_percent)))
+    image = image.resize((w_size, base_height), Image.LANCZOS)
+    if task_prompt == "Caption":
+        task_prompt = "<CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         return results
+    elif task_prompt == "Detailed Caption":
+        task_prompt = "<DETAILED_CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         return results
+    elif task_prompt == "More Detailed Caption":
+        task_prompt = "<MORE_DETAILED_CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         results = results[task_prompt]
         return results
+    elif task_prompt == "Caption + Grounding":
+        task_prompt = "<CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
+        task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
         results = run_example(task_prompt, image, text_input, model_id)
+        results["<CAPTION>"] = text_input
+        fig = plot_bbox(image, results["<CAPTION_TO_PHRASE_GROUNDING>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Detailed Caption + Grounding":
+        task_prompt = "<DETAILED_CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
+        task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
         results = run_example(task_prompt, image, text_input, model_id)
+        results["<DETAILED_CAPTION>"] = text_input
+        fig = plot_bbox(image, results["<CAPTION_TO_PHRASE_GROUNDING>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "More Detailed Caption + Grounding":
+        task_prompt = "<MORE_DETAILED_CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
         text_input = results[task_prompt]
+        task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
         results = run_example(task_prompt, image, text_input, model_id)
+        results["<MORE_DETAILED_CAPTION>"] = text_input
+        fig = plot_bbox(image, results["<CAPTION_TO_PHRASE_GROUNDING>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Object Detection":
+        task_prompt = "<OD>"
         results = run_example(task_prompt, image, model_id=model_id)
+        fig = plot_bbox(image, results["<OD>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Dense Region Caption":
+        task_prompt = "<DENSE_REGION_CAPTION>"
         results = run_example(task_prompt, image, model_id=model_id)
+        fig = plot_bbox(image, results["<DENSE_REGION_CAPTION>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Region Proposal":
+        task_prompt = "<REGION_PROPOSAL>"
         results = run_example(task_prompt, image, model_id=model_id)
+        fig = plot_bbox(image, results["<REGION_PROPOSAL>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Caption to Phrase Grounding":
+        task_prompt = "<CAPTION_TO_PHRASE_GROUNDING>"
         results = run_example(task_prompt, image, text_input, model_id)
+        fig = plot_bbox(image, results["<CAPTION_TO_PHRASE_GROUNDING>"])
         return results, fig_to_pil(fig)
+    elif task_prompt == "Referring Expression Segmentation":
+        task_prompt = "<REFERRING_EXPRESSION_SEGMENTATION>"
         results = run_example(task_prompt, image, text_input, model_id)
         output_image = copy.deepcopy(image)
+        output_image = draw_polygons(
+            output_image, results["<REFERRING_EXPRESSION_SEGMENTATION>"], fill_mask=True
+        )
         return results, output_image
+    elif task_prompt == "Region to Segmentation":
+        task_prompt = "<REGION_TO_SEGMENTATION>"
         results = run_example(task_prompt, image, text_input, model_id)
         output_image = copy.deepcopy(image)
+        output_image = draw_polygons(
+            output_image, results["<REGION_TO_SEGMENTATION>"], fill_mask=True
+        )
         return results, output_image
+    elif task_prompt == "Open Vocabulary Detection":
+        task_prompt = "<OPEN_VOCABULARY_DETECTION>"
         results = run_example(task_prompt, image, text_input, model_id)
+        bbox_results = convert_to_od_format(results["<OPEN_VOCABULARY_DETECTION>"])
         fig = plot_bbox(image, bbox_results)
         return results, fig_to_pil(fig)
+    elif task_prompt == "Region to Category":
+        task_prompt = "<REGION_TO_CATEGORY>"
         results = run_example(task_prompt, image, text_input, model_id)
         return results
+    elif task_prompt == "Region to Description":
+        task_prompt = "<REGION_TO_DESCRIPTION>"
         results = run_example(task_prompt, image, text_input, model_id)
         return results
+    elif task_prompt == "OCR":
+        task_prompt = "<OCR>"
         results = run_example(task_prompt, image, model_id=model_id)
         return results
+    elif task_prompt == "OCR with Region":
+        task_prompt = "<OCR_WITH_REGION>"
         results = run_example(task_prompt, image, model_id=model_id)
         output_image = copy.deepcopy(image)
+        output_image = draw_ocr_bboxes(output_image, results["<OCR_WITH_REGION>"])
         return results, output_image
     else:
         return "", None  # Return empty string and None for unknown task prompts
 def update_task_dropdown(choice):
+    if choice == "Cascased task":
+        return gr.Dropdown(choices=cascased_task_list, value="Caption + Grounding")
     else:
+        return gr.Dropdown(choices=single_task_list, value="Caption")

feifeilib/feifeifluxapi.py CHANGED Viewed

@@ -1,15 +1,14 @@
-import os
-from huggingface_hub import InferenceClient
-client = InferenceClient("aifeifei798/feifei-flux-lora-v1.1", token=os.getenv('HF_TOKEN'))
-client.headers["x-use-cache"] = "0"
-def feifeifluxapi(prompt, height=1152, width=896, guidance_scale=3.5):
-    # output is a PIL.Image object
-    prompt = prompt.replace('\n', ' ')
-    result = client.text_to_image(
-    		prompt=prompt,
-    		width=width,
-    		height=height
-    )
-    return result

+import os
+from huggingface_hub import InferenceClient
+client = InferenceClient(
+    "aifeifei798/feifei-flux-lora-v1.1", token=os.getenv("HF_TOKEN")
+)
+client.headers["x-use-cache"] = "0"
+def feifeifluxapi(prompt, height=1152, width=896, guidance_scale=3.5):
+    # output is a PIL.Image object
+    prompt = prompt.replace("\n", " ")
+    result = client.text_to_image(prompt=prompt, width=width, height=height)
+    return result

feifeilib/feifeimodload.py CHANGED Viewed

@@ -1,49 +1,48 @@
-import torch
-import spaces
-from diffusers import (
-    DiffusionPipeline,
-    AutoencoderTiny,
-    FluxImg2ImgPipeline
-)
-from huggingface_hub import hf_hub_download
-def feifeimodload():
-    dtype = torch.bfloat16
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    pipe = DiffusionPipeline.from_pretrained(
-        "aifeifei798/DarkIdol-flux-v1", torch_dtype=dtype
-    ).to(device)
-    #pipe = DiffusionPipeline.from_pretrained(
-    #    "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
-    #).to(device)
-    pipe.load_lora_weights(
-        hf_hub_download("aifeifei798/feifei-flux-lora-v1.1", "feifei-v1.1.safetensors"),
-        adapter_name="feifei",
-    )
-    pipe.load_lora_weights(
-        hf_hub_download("aifeifei798/sldr_flux_nsfw_v2-studio", "sldr_flux_nsfw_v2-studio.safetensors"),
-        adapter_name="sldr_flux_nsfw_v2",
-    )
-    #pipe.set_adapters(
-    #    ["feifei"],
-    #    adapter_weights=[0.85],
-    #)
-    #pipe.fuse_lora(
-    #    adapter_name=["feifei"],
-    #    lora_scale=1.0,
-    #)
-    #pipe.enable_sequential_cpu_offload()
-    pipe.vae.enable_slicing()
-    pipe.vae.enable_tiling()
-    # pipe.unload_lora_weights()
-    torch.cuda.empty_cache()
-    return pipe

+import torch
+import spaces
+from diffusers import DiffusionPipeline, AutoencoderTiny, FluxImg2ImgPipeline
+from huggingface_hub import hf_hub_download
+def feifeimodload():
+    dtype = torch.bfloat16
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    pipe = DiffusionPipeline.from_pretrained(
+        "aifeifei798/DarkIdol-flux-v1", torch_dtype=dtype
+    ).to(device)
+    # pipe = DiffusionPipeline.from_pretrained(
+    #    "black-forest-labs/FLUX.1-schnell", torch_dtype=dtype
+    # ).to(device)
+    pipe.load_lora_weights(
+        hf_hub_download("aifeifei798/feifei-flux-lora-v1.1", "feifei-v1.1.safetensors"),
+        adapter_name="feifei",
+    )
+    pipe.load_lora_weights(
+        hf_hub_download(
+            "aifeifei798/sldr_flux_nsfw_v2-studio",
+            "sldr_flux_nsfw_v2-studio.safetensors",
+        ),
+        adapter_name="sldr_flux_nsfw_v2",
+    )
+    # pipe.set_adapters(
+    #    ["feifei"],
+    #    adapter_weights=[0.85],
+    # )
+    # pipe.fuse_lora(
+    #    adapter_name=["feifei"],
+    #    lora_scale=1.0,
+    # )
+    # pipe.enable_sequential_cpu_offload()
+    pipe.vae.enable_slicing()
+    pipe.vae.enable_tiling()
+    # pipe.unload_lora_weights()
+    torch.cuda.empty_cache()
+    return pipe

feifeilib/feifeiprompt.py CHANGED Viewed

@@ -1,51 +1,69 @@
-import numpy as np
-import random
-import re
-import torch
-import config
-with open("artist.txt", "r") as file:
-    artists = file.readlines()
-MAX_SEED = np.iinfo(np.int32).max
-# 去除每行末尾的换行符
-artists = [artist.strip() for artist in artists]
-def feifeiprompt(randomize_seed,seed,prompt,quality_select,styles_Radio,FooocusExpansion_select):
-    # 处理随机种子
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    else:
-        seed = int(seed)  # Ensure seed is an integer
-    generator = torch.Generator().manual_seed(seed)
-    if not prompt:
-        prompt = "the photo is a 18 yo jpop girl is looking absolutely adorable and gorgeous, with a playful and mischievous grin, her eyes twinkling with joy. art by __artist__ and __artist__"
-    if "__artist__" in prompt:
-        # 随机选择艺术家
-        selected_artists = random.sample(artists, len(artists))
-        # 使用正则表达式替换所有的 __artist__
-        def replace_artists(match):
-            return selected_artists.pop(0)
-        prompt = re.sub(r"__artist__", replace_artists, prompt)
-    if quality_select:
-        prompt += ", masterpiece, best quality, very aesthetic, absurdres"
-    if FooocusExpansion_select:
-        prompt = re.sub("girl", " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, " ,prompt)
-        prompt = re.sub("young woman", " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, " ,prompt)
-        prompt = re.sub("woman", " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, " ,prompt)
-        prompt = re.sub("model", " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, " ,prompt)
-    if styles_Radio:
-        for style_name in styles_Radio:
-            for style in config.style_list:
-                if style["name"] == style_name:
-                    prompt += style["prompt"].replace("{prompt}", prompt)
-    return prompt,generator

+import numpy as np
+import random
+import re
+import torch
+import config
+with open("artist.txt", "r") as file:
+    artists = file.readlines()
+MAX_SEED = np.iinfo(np.int32).max
+# 去除每行末尾的换行符
+artists = [artist.strip() for artist in artists]
+def feifeiprompt(
+    randomize_seed, seed, prompt, quality_select, styles_Radio, FooocusExpansion_select
+):
+    # 处理随机种子
+    if randomize_seed:
+        seed = random.randint(0, MAX_SEED)
+    else:
+        seed = int(seed)  # Ensure seed is an integer
+    generator = torch.Generator().manual_seed(seed)
+    if not prompt:
+        prompt = "the photo is a 18 yo jpop girl is looking absolutely adorable and gorgeous, with a playful and mischievous grin, her eyes twinkling with joy. art by __artist__ and __artist__"
+    if "__artist__" in prompt:
+        # 随机选择艺术家
+        selected_artists = random.sample(artists, len(artists))
+        # 使用正则表达式替换所有的 __artist__
+        def replace_artists(match):
+            return selected_artists.pop(0)
+        prompt = re.sub(r"__artist__", replace_artists, prompt)
+    if quality_select:
+        prompt += ", masterpiece, best quality, very aesthetic, absurdres"
+    if FooocusExpansion_select:
+        prompt = re.sub(
+            "girl",
+            " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, ",
+            prompt,
+        )
+        prompt = re.sub(
+            "young woman",
+            " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, ",
+            prompt,
+        )
+        prompt = re.sub(
+            "woman",
+            " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, ",
+            prompt,
+        )
+        prompt = re.sub(
+            "model",
+            " feifei, A beautiful, 18 yo kpop idol, large-busted Japanese slim girl, with light makeup, gazing deeply into the camera, ",
+            prompt,
+        )
+    if styles_Radio:
+        style_name = styles_Radio
+        for style in config.style_list:
+            if style["name"] == style_name:
+                prompt += style["prompt"].replace("{prompt}", prompt)
+    return prompt, generator

feifeilib/feifeisharpened.py CHANGED Viewed

@@ -1,43 +1,48 @@
-from PIL import Image
-import torch
-import torch.nn.functional as F
-import numpy as np
-def feifeisharpened(image,num_strength):
-    # 将PIL图像转换为NumPy数组
-    image_np = np.array(image)
-    # 将NumPy数组转换为PyTorch张量
-    image_tensor = (torch.tensor(image_np).permute(
-        2, 0, 1).unsqueeze(0).float().to("cuda"))
-    # 定义锐化滤镜，并调整中心值
-    strength = num_strength
-    sharpen_kernel = (torch.tensor(
-        [
-            [0, -1 * strength, 0],
-            [-1 * strength, 1 + 4 * strength, -1 * strength],
-            [0, -1 * strength, 0],
-        ],
-        dtype=torch.float32,
-    ).unsqueeze(0).unsqueeze(0).to("cuda"))
-    # 分别对每个通道应用卷积核
-    sharpened_channels = []
-    for i in range(3):
-        channel_tensor = image_tensor[:, i:i + 1, :, :]
-        sharpened_channel = F.conv2d(channel_tensor,
-                                     sharpen_kernel,
-                                     padding=1)
-        sharpened_channels.append(sharpened_channel)
-    # 合并通道
-    sharpened_image_tensor = torch.cat(sharpened_channels, dim=1)
-    # 将增强后的图像转换回PIL格式
-    sharpened_image_np = (sharpened_image_tensor.squeeze(0).permute(
-        1, 2, 0).cpu().numpy())
-    sharpened_image_np = np.clip(sharpened_image_np, 0,
-                                 255).astype(np.uint8)
-    image = Image.fromarray(sharpened_image_np)
-    return image

+from PIL import Image
+import torch
+import torch.nn.functional as F
+import numpy as np
+def feifeisharpened(image, num_strength):
+    # 将PIL图像转换为NumPy数组
+    image_np = np.array(image)
+    # 将NumPy数组转换为PyTorch张量
+    image_tensor = (
+        torch.tensor(image_np).permute(2, 0, 1).unsqueeze(0).float().to("cuda")
+    )
+    # 定义锐化滤镜，并调整中心值
+    strength = num_strength
+    sharpen_kernel = (
+        torch.tensor(
+            [
+                [0, -1 * strength, 0],
+                [-1 * strength, 1 + 4 * strength, -1 * strength],
+                [0, -1 * strength, 0],
+            ],
+            dtype=torch.float32,
+        )
+        .unsqueeze(0)
+        .unsqueeze(0)
+        .to("cuda")
+    )
+    # 分别对每个通道应用卷积核
+    sharpened_channels = []
+    for i in range(3):
+        channel_tensor = image_tensor[:, i : i + 1, :, :]
+        sharpened_channel = F.conv2d(channel_tensor, sharpen_kernel, padding=1)
+        sharpened_channels.append(sharpened_channel)
+    # 合并通道
+    sharpened_image_tensor = torch.cat(sharpened_channels, dim=1)
+    # 将增强后的图像转换回PIL格式
+    sharpened_image_np = (
+        sharpened_image_tensor.squeeze(0).permute(1, 2, 0).cpu().numpy()
+    )
+    sharpened_image_np = np.clip(sharpened_image_np, 0, 255).astype(np.uint8)
+    image = Image.fromarray(sharpened_image_np)
+    return image

feifeilib/feifeitexttoimg.py CHANGED Viewed

@@ -1,69 +1,77 @@
-import gradio as gr
-import spaces
-import random
-import numpy as np
-from feifeilib.feifeimodload import feifeimodload
-from feifeilib.feifeiprompt import feifeiprompt
-from feifeilib.feifeisharpened import feifeisharpened
-pipe = feifeimodload()
-MAX_SEED = np.iinfo(np.int32).max
-@spaces.GPU()
-def feifeitexttoimg(
-        prompt,
-        quality_select=False,
-        sharpened_select=False,
-        styles_Radio=["(None)"],
-        FooocusExpansion_select=False,
-        nsfw_select=False,
-        nsfw_slider=0.45,
-        seed=random.randint(0, MAX_SEED),
-        randomize_seed=False,
-        width=896,
-        height=1152,
-        num_inference_steps=4,
-        guidance_scale=3.5,
-        num_strength=0.35,
-        num_feifei=0.45,
-        progress=gr.Progress(track_tqdm=True),
-):
-    prompt,generator = feifeiprompt(randomize_seed,seed,prompt,quality_select,styles_Radio,FooocusExpansion_select)
-    if nsfw_select:
-        pipe.set_adapters(
-            ["feifei", "sldr_flux_nsfw_v2"],
-            adapter_weights=[num_feifei,nsfw_slider],
-        )
-        pipe.fuse_lora(
-            adapter_name=["feifei", "sldr_flux_nsfw_v2"],
-            lora_scale=1.0,
-        )
-    else:
-        pipe.set_adapters(
-            ["feifei"],
-            adapter_weights=[num_feifei],
-        )
-        pipe.fuse_lora(
-            adapter_name=["feifei"],
-            lora_scale=1.0,
-        )
-    #pipe.unload_lora_weights()
-    image = pipe(
-        prompt="flux, 8k, ",
-        prompt_2=prompt,
-        width=width,
-        height=height,
-        num_inference_steps=num_inference_steps,
-        generator=generator,
-        guidance_scale=guidance_scale,
-        output_type="pil",
-    ).images[0]
-    if sharpened_select:
-        feifeisharpened(image,num_strength)
-    return image, prompt

+import gradio as gr
+import spaces
+import random
+import numpy as np
+from feifeilib.feifeimodload import feifeimodload
+from feifeilib.feifeiprompt import feifeiprompt
+from feifeilib.feifeisharpened import feifeisharpened
+pipe = feifeimodload()
+MAX_SEED = np.iinfo(np.int32).max
+@spaces.GPU()
+def feifeitexttoimg(
+    prompt,
+    quality_select=False,
+    sharpened_select=False,
+    styles_Radio=["(None)"],
+    FooocusExpansion_select=False,
+    nsfw_select=False,
+    nsfw_slider=0.45,
+    seed=random.randint(0, MAX_SEED),
+    randomize_seed=False,
+    width=896,
+    height=1152,
+    num_inference_steps=4,
+    guidance_scale=3.5,
+    num_strength=0.35,
+    num_feifei=0.45,
+    progress=gr.Progress(track_tqdm=True),
+):
+    prompt, generator = feifeiprompt(
+        randomize_seed,
+        seed,
+        prompt,
+        quality_select,
+        styles_Radio,
+        FooocusExpansion_select,
+    )
+    if nsfw_select:
+        pipe.set_adapters(
+            ["feifei", "sldr_flux_nsfw_v2"],
+            adapter_weights=[num_feifei, nsfw_slider],
+        )
+        pipe.fuse_lora(
+            adapter_name=["feifei", "sldr_flux_nsfw_v2"],
+            lora_scale=1.0,
+        )
+    else:
+        pipe.set_adapters(
+            ["feifei"],
+            adapter_weights=[num_feifei],
+        )
+        pipe.fuse_lora(
+            adapter_name=["feifei"],
+            lora_scale=1.0,
+        )
+    # pipe.unload_lora_weights()
+    image = pipe(
+        prompt="flux, 8k, ",
+        prompt_2=prompt,
+        width=width,
+        height=height,
+        num_inference_steps=num_inference_steps,
+        generator=generator,
+        guidance_scale=guidance_scale,
+        output_type="pil",
+    ).images[0]
+    if sharpened_select:
+        feifeisharpened(image, num_strength)
+    return image, prompt

feifeiui/feifeiui.py CHANGED Viewed

@@ -1,208 +1,221 @@
-import gradio as gr
-import numpy as np
-import config
-from feifeilib.feifeichat import feifeichat
-from feifeilib.feifeitexttoimg import feifeitexttoimg
-from feifeilib.feifeiflorence import feifeiflorence
-from feifeilib.feifeifluxapi import feifeifluxapi
-from feifeilib.feifeiflorencebase import process_image
-MAX_SEED = np.iinfo(np.int32).max
-MAX_IMAGE_SIZE = 2048
-css = """
-#col-container {
-    width: auto;
-    height: 998px;
-}
-"""
-def create_ui():
-    with gr.Blocks(css=css) as FeiFei:
-        with gr.Row():
-            with gr.Column(scale=3):
-                with gr.Tab("FeiFei"):
-                    with gr.Row():
-                        with gr.Column(scale=1):
-                            prompt = gr.Text(
-                                label="Prompt",
-                                show_label=False,
-                                placeholder="Enter your prompt",
-                                value="real girl in real life, ",
-                                max_lines=12,
-                                container=False,
-                            )
-                            feifei_button = gr.Button("FeiFei")
-                            quality_select = gr.Checkbox(label="high quality")
-                            sharpened_select = gr.Checkbox(label="Sharpened")
-                            FooocusExpansion_select = gr.Checkbox(
-                                label="Expansion", value=True)
-                            styles_name = [
-                                style["name"] for style in config.style_list
-                            ]
-                            styles_Radio = gr.Dropdown(styles_name,
-                                                       label="Styles",
-                                                       multiselect=False,
-                                                       value="Photographic")
-                            nsfw_select = gr.Checkbox(label="NSFW")
-                            nsfw_slider = gr.Slider(
-                                label="NSFW",
-                                minimum=0,
-                                maximum=2,
-                                step=0.05,
-                                value=0.45,
-                            )
-                            out_prompt = gr.Text(
-                                label="Prompt",
-                                show_label=False,
-                                max_lines=12,
-                                placeholder="this photo prompt",
-                                value="",
-                                container=False,
-                            )
-                            with gr.Accordion("More",open=False):
-                                seed = gr.Slider(
-                                    label="Seed",
-                                    minimum=0,
-                                    maximum=MAX_SEED,
-                                    step=1,
-                                    value=0,
-                                )
-                                randomize_seed = gr.Checkbox(label="Randomize seed",
-                                                             value=True)
-                                width = gr.Slider(
-                                    label="Width",
-                                    minimum=512,
-                                    maximum=MAX_IMAGE_SIZE,
-                                    step=64,
-                                    value=1088,
-                                )
-                                height = gr.Slider(
-                                    label="Height",
-                                    minimum=512,
-                                    maximum=MAX_IMAGE_SIZE,
-                                    step=64,
-                                    value=1920,
-                                )
-                                num_inference_steps = gr.Slider(
-                                    label="Number of inference steps",
-                                    minimum=1,
-                                    maximum=50,
-                                    step=1,
-                                    value=4,
-                                )
-                                guidancescale = gr.Slider(
-                                    label="Guidance scale",
-                                    minimum=0,
-                                    maximum=10,
-                                    step=0.1,
-                                    value=3.5,
-                                )
-                                num_strength = gr.Slider(
-                                    label="strength",
-                                    minimum=0,
-                                    maximum=2,
-                                    step=0.001,
-                                    value=0.035,
-                                )
-                                num_feifei = gr.Slider(
-                                    label="FeiFei",
-                                    minimum=0,
-                                    maximum=2,
-                                    step=0.05,
-                                    value=0.45,
-                                )
-                        with gr.Column(scale=2):
-                            result = gr.Image(label="Result",
-                                              show_label=False,
-                                              interactive=False,
-                                              height=940)
-                with gr.Tab("GenPrompt"):
-                    input_img = gr.Image(label="Input Picture",
-                                         show_label=False,
-                                         height=320,
-                                         type="filepath")
-                    florence_btn = gr.Button(value="GenPrompt")
-                    output_text = gr.Textbox(label="Output Text",
-                                             show_label=False,
-                                             container=False)
-                with gr.Tab(label="Florence-2"):
-                    with gr.Row():
-                        florence_input_img = gr.Image(label="Input Picture",height=320,type="filepath")
-                    with gr.Row():
-                        florence_submit_btn = gr.Button(value="GenPrompt")
-                    with gr.Row():
-                        florence_output_text = gr.Textbox(label="Flux Prompt",
-                                                          show_label=False,
-                                                          container=False)
-            with gr.Column(scale=1, elem_id="col-container"):
-                gr.ChatInterface(
-                    feifeichat,
-                    type="messages",
-                    multimodal=True,
-                    additional_inputs=[
-                        gr.Checkbox(label="Feifei", value=True),
-                        gr.Dropdown(
-                            ["meta-llama/Llama-3.3-70B-Instruct",
-                             "CohereForAI/c4ai-command-r-plus-08-2024",
-                             "Qwen/Qwen2.5-72B-Instruct",
-                             "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
-                             "NousResearch/Hermes-3-Llama-3.1-8B",
-                             "mistralai/Mistral-Nemo-Instruct-2411",
-                             "microsoft/Phi-3.5-mini-instruct"],
-                            value="mistralai/Mistral-Nemo-Instruct-2411",
-                            show_label=False,
-                            container=False),
-                        gr.Radio(
-                            ["pixtral", "Vsiion"],
-                            value="pixtral",
-                            show_label=False,
-                            container=False)
-                    ],
-                )
-        feifei_button.click(
-            fn=feifeitexttoimg,  # Function to run for this button
-            inputs=[
-                prompt,
-                quality_select,
-                sharpened_select,
-                styles_Radio,
-                FooocusExpansion_select,
-                nsfw_select,
-                nsfw_slider,
-                seed,
-                randomize_seed,
-                width,
-                height,
-                num_inference_steps,
-                guidancescale,
-                num_strength,
-                num_feifei,
-            ],
-            outputs=[result, out_prompt],
-        )
-        florence_btn.click(
-            fn=feifeiflorence,  # Function to run when the button is clicked
-            inputs=[input_img],  # Input components for the function
-            outputs=[output_text],  # Output component for the function
-        )
-        florence_submit_btn.click(process_image, [florence_input_img], [florence_output_text])
-    return FeiFei

+import gradio as gr
+import numpy as np
+import config
+from feifeilib.feifeichat import feifeichat
+from feifeilib.feifeitexttoimg import feifeitexttoimg
+from feifeilib.feifeiflorence import feifeiflorence
+from feifeilib.feifeifluxapi import feifeifluxapi
+from feifeilib.feifeiflorencebase import process_image
+MAX_SEED = np.iinfo(np.int32).max
+MAX_IMAGE_SIZE = 2048
+css = """
+#col-container {
+    width: auto;
+    height: 998px;
+}
+"""
+def create_ui():
+    with gr.Blocks(css=css) as FeiFei:
+        with gr.Row():
+            with gr.Column(scale=3):
+                with gr.Tab("FeiFei"):
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            prompt = gr.Text(
+                                label="Prompt",
+                                show_label=False,
+                                placeholder="Enter your prompt",
+                                value="real girl in real life, ",
+                                max_lines=12,
+                                container=False,
+                            )
+                            feifei_button = gr.Button("FeiFei")
+                            quality_select = gr.Checkbox(label="high quality")
+                            sharpened_select = gr.Checkbox(label="Sharpened")
+                            FooocusExpansion_select = gr.Checkbox(
+                                label="Expansion", value=True
+                            )
+                            styles_name = [style["name"] for style in config.style_list]
+                            styles_Radio = gr.Dropdown(
+                                styles_name,
+                                label="Styles",
+                                multiselect=False,
+                                value="Photographic",
+                            )
+                            nsfw_select = gr.Checkbox(label="NSFW")
+                            nsfw_slider = gr.Slider(
+                                label="NSFW",
+                                minimum=0,
+                                maximum=2,
+                                step=0.05,
+                                value=0.45,
+                            )
+                            out_prompt = gr.Text(
+                                label="Prompt",
+                                show_label=False,
+                                max_lines=12,
+                                placeholder="this photo prompt",
+                                value="",
+                                container=False,
+                            )
+                            with gr.Accordion("More", open=False):
+                                seed = gr.Slider(
+                                    label="Seed",
+                                    minimum=0,
+                                    maximum=MAX_SEED,
+                                    step=1,
+                                    value=0,
+                                )
+                                randomize_seed = gr.Checkbox(
+                                    label="Randomize seed", value=True
+                                )
+                                width = gr.Slider(
+                                    label="Width",
+                                    minimum=512,
+                                    maximum=MAX_IMAGE_SIZE,
+                                    step=64,
+                                    value=1088,
+                                )
+                                height = gr.Slider(
+                                    label="Height",
+                                    minimum=512,
+                                    maximum=MAX_IMAGE_SIZE,
+                                    step=64,
+                                    value=1920,
+                                )
+                                num_inference_steps = gr.Slider(
+                                    label="Number of inference steps",
+                                    minimum=1,
+                                    maximum=50,
+                                    step=1,
+                                    value=4,
+                                )
+                                guidancescale = gr.Slider(
+                                    label="Guidance scale",
+                                    minimum=0,
+                                    maximum=10,
+                                    step=0.1,
+                                    value=3.5,
+                                )
+                                num_strength = gr.Slider(
+                                    label="strength",
+                                    minimum=0,
+                                    maximum=2,
+                                    step=0.001,
+                                    value=0.035,
+                                )
+                                num_feifei = gr.Slider(
+                                    label="FeiFei",
+                                    minimum=0,
+                                    maximum=2,
+                                    step=0.05,
+                                    value=0.45,
+                                )
+                        with gr.Column(scale=2):
+                            result = gr.Image(
+                                label="Result",
+                                show_label=False,
+                                interactive=False,
+                                height=940,
+                            )
+                with gr.Tab("GenPrompt"):
+                    input_img = gr.Image(
+                        label="Input Picture",
+                        show_label=False,
+                        height=320,
+                        type="filepath",
+                    )
+                    florence_btn = gr.Button(value="GenPrompt")
+                    output_text = gr.Textbox(
+                        label="Output Text", show_label=False, container=False
+                    )
+                with gr.Tab(label="Florence-2"):
+                    with gr.Row():
+                        florence_input_img = gr.Image(
+                            label="Input Picture", height=320, type="filepath"
+                        )
+                    with gr.Row():
+                        florence_submit_btn = gr.Button(value="GenPrompt")
+                    with gr.Row():
+                        florence_output_text = gr.Textbox(
+                            label="Flux Prompt", show_label=False, container=False
+                        )
+            with gr.Column(scale=1, elem_id="col-container"):
+                gr.ChatInterface(
+                    feifeichat,
+                    type="messages",
+                    multimodal=True,
+                    additional_inputs=[
+                        gr.Checkbox(label="Feifei", value=True),
+                        gr.Dropdown(
+                            [
+                                "meta-llama/Llama-3.3-70B-Instruct",
+                                "CohereForAI/c4ai-command-r-plus-08-2024",
+                                "Qwen/Qwen2.5-72B-Instruct",
+                                "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
+                                "NousResearch/Hermes-3-Llama-3.1-8B",
+                                "mistralai/Mistral-Nemo-Instruct-2411",
+                                "microsoft/Phi-3.5-mini-instruct",
+                            ],
+                            value="mistralai/Mistral-Nemo-Instruct-2411",
+                            show_label=False,
+                            container=False,
+                        ),
+                        gr.Radio(
+                            ["pixtral", "Vsiion"],
+                            value="pixtral",
+                            show_label=False,
+                            container=False,
+                        ),
+                    ],
+                )
+        feifei_button.click(
+            fn=feifeitexttoimg,  # Function to run for this button
+            inputs=[
+                prompt,
+                quality_select,
+                sharpened_select,
+                styles_Radio,
+                FooocusExpansion_select,
+                nsfw_select,
+                nsfw_slider,
+                seed,
+                randomize_seed,
+                width,
+                height,
+                num_inference_steps,
+                guidancescale,
+                num_strength,
+                num_feifei,
+            ],
+            outputs=[result, out_prompt],
+        )
+        florence_btn.click(
+            fn=feifeiflorence,  # Function to run when the button is clicked
+            inputs=[input_img],  # Input components for the function
+            outputs=[output_text],  # Output component for the function
+        )
+        florence_submit_btn.click(
+            process_image, [florence_input_img], [florence_output_text]
+        )
+    return FeiFei

requirements.txt CHANGED Viewed

@@ -1,16 +1,16 @@
-timm
-gradio
-mistralai
-requests
-accelerate
-git+https://github.com/huggingface/diffusers.git
-invisible_watermark
-torch
-xformers
-sentencepiece
-transformers
-peft
-psutil
-gradio_client
-spaces
 matplotlib

+timm
+gradio
+mistralai
+requests
+accelerate
+git+https://github.com/huggingface/diffusers.git
+invisible_watermark
+torch
+xformers
+sentencepiece
+transformers
+peft
+psutil
+gradio_client
+spaces
 matplotlib