Spaces:

OpenVINO
/

nncf-quantization

Running

App Files Files Community

echarlaix HF staff commited on Jul 30, 2024

Commit

d243de5

1 Parent(s): 6c76373

rephrase description

Browse files

Files changed (1) hide show

app.py +19 -17

app.py CHANGED Viewed

@@ -95,11 +95,13 @@ def quantize_model(
             calibration_dataset = None
         is_int8 = dtype == "int8"
-        if library_name == "diffusers":
-            quant_method = "hybrid"
-        elif not is_int8 and calibration_dataset is not None:
             quant_method = "awq"
         else:
             quant_method = "default"
         quantization_config = OVWeightQuantizationConfig(
@@ -112,7 +114,7 @@ def quantize_model(
         api = HfApi(token=oauth_token.token)
         if api.repo_exists(new_repo_id) and not overwritte:
-            return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repo"
         with TemporaryDirectory() as d:
             folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
@@ -130,7 +132,7 @@ def quantize_model(
                 ov_model.save_pretrained(folder)
                 new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
                 new_repo_id = new_repo_url.repo_id
-                print("Repo created successfully!", new_repo_url)
                 folder = Path(folder)
                 for dir_name in (
@@ -169,9 +171,9 @@ def quantize_model(
                 card.data.base_model = model_id
                 card.text = dedent(
                     f"""
-                    This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and was exported to the OpenVINO format using [optimum-intel](https://github.com/huggingface/optimum-intel) via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space.
-                    First make sure you have optimum-intel installed:
                     ```bash
                     pip install optimum[openvino]
@@ -195,16 +197,16 @@ def quantize_model(
                     path_in_repo="README.md",
                     repo_id=new_repo_id,
                 )
-                return f"This model was successfully quantized, find it under your repo {new_repo_url}"
             finally:
                 shutil.rmtree(folder, ignore_errors=True)
     except Exception as e:
         return f"### Error: {e}"
 DESCRIPTION = """
-This Space uses [Optimum Intel](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) to automatically apply NNCF weight only quantization on a model hosted on the [Hub](https://huggingface.co/models) and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
-The resulting model will then be pushed under your HF user namespace. For now we only support conversion for models that are hosted on public repositories.
 The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
 """
@@ -215,9 +217,9 @@ model_id = HuggingfaceHubSearch(
     search_type="model",
 )
 dtype = gr.Dropdown(
-    ["int8", "int4"],
-    value="int8",
-    label="Precision data types",
     filterable=False,
     visible=True,
 )
@@ -255,13 +257,13 @@ ratio = gr.Slider(
 )
 private_repo = gr.Checkbox(
     value=False,
-    label="Private Repo",
-    info="Create a private repo under your username",
 )
 overwritte = gr.Checkbox(
     value=False,
-    label="Overwrite repo content",
-    info="Enable pushing files on existing repo, potentially overwriting existing files",
 )
 interface = gr.Interface(
     fn=quantize_model,

             calibration_dataset = None
         is_int8 = dtype == "int8"
+        # if library_name == "diffusers":
+        # quant_method = "hybrid"
+        if not is_int8 and calibration_dataset is not None:
             quant_method = "awq"
         else:
+            if calibration_dataset is not None:
+                print("Default quantization was selected, calibration dataset won't be used")
             quant_method = "default"
         quantization_config = OVWeightQuantizationConfig(
         api = HfApi(token=oauth_token.token)
         if api.repo_exists(new_repo_id) and not overwritte:
+            return f"Model {new_repo_id} already exist, please set overwritte=True to push on an existing repository"
         with TemporaryDirectory() as d:
             folder = os.path.join(d, repo_folder_name(repo_id=model_id, repo_type="models"))
                 ov_model.save_pretrained(folder)
                 new_repo_url = api.create_repo(repo_id=new_repo_id, exist_ok=True, private=private_repo)
                 new_repo_id = new_repo_url.repo_id
+                print("Repository created successfully!", new_repo_url)
                 folder = Path(folder)
                 for dir_name in (
                 card.data.base_model = model_id
                 card.text = dedent(
                     f"""
+                    This model is a quantized version of [`{model_id}`](https://huggingface.co/{model_id}) and is converted to the OpenVINO format. This model was obtained via the [nncf-quantization](https://huggingface.co/spaces/echarlaix/nncf-quantization) space with [optimum-intel](https://github.com/huggingface/optimum-intel).
+                    First make sure you have `optimum-intel` installed:
                     ```bash
                     pip install optimum[openvino]
                     path_in_repo="README.md",
                     repo_id=new_repo_id,
                 )
+                return f"This model was successfully quantized, find it under your repository {new_repo_url}"
             finally:
                 shutil.rmtree(folder, ignore_errors=True)
     except Exception as e:
         return f"### Error: {e}"
 DESCRIPTION = """
+This Space uses [Optimum Intel](https://github.com/huggingface/optimum-intel) to automatically apply NNCF [Weight Only Quantization](https://huggingface.co/docs/optimum/main/en/intel/openvino/optimization) (WOQ) on your model and convert it to the [OpenVINO format](https://docs.openvino.ai/2024/documentation/openvino-ir-format.html) if not already.
+After conversion, a repository will be pushed under your namespace with the resulting model.
 The list of the supported architectures can be found in the [documentation](https://huggingface.co/docs/optimum/main/en/intel/openvino/models)
 """
     search_type="model",
 )
 dtype = gr.Dropdown(
+    ["8-bit", "4-bit"],
+    value="8-bit",
+    label="Weights precision",
     filterable=False,
     visible=True,
 )
 )
 private_repo = gr.Checkbox(
     value=False,
+    label="Private repository",
+    info="Create a private repository instead of a public one",
 )
 overwritte = gr.Checkbox(
     value=False,
+    label="Overwrite repository content",
+    info="Enable pushing files on existing repositories, potentially overwriting existing files",
 )
 interface = gr.Interface(
     fn=quantize_model,