DuyTa commited on
Commit
74b1bac
·
verified ·
1 Parent(s): c7b0b15

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. pipelines/.dockerignore +1 -0
  2. pipelines/.env.example +23 -0
  3. pipelines/.gitignore +12 -0
  4. pipelines/BM25/__init__.py +1 -0
  5. pipelines/BM25/__pycache__/__init__.cpython-310.pyc +0 -0
  6. pipelines/BM25/__pycache__/__init__.cpython-311.pyc +0 -0
  7. pipelines/BM25/__pycache__/__init__.cpython-312.pyc +0 -0
  8. pipelines/BM25/__pycache__/bm25sretriever.cpython-310.pyc +0 -0
  9. pipelines/BM25/__pycache__/bm25sretriever.cpython-311.pyc +0 -0
  10. pipelines/BM25/__pycache__/bm25sretriever.cpython-312.pyc +0 -0
  11. pipelines/BM25/bm25sretriever.py +168 -0
  12. pipelines/BM25/preprocess.py +21 -0
  13. pipelines/CONTRIBUTING.md +50 -0
  14. pipelines/LICENSE +21 -0
  15. pipelines/README.md +114 -0
  16. pipelines/Router/__init__.py +1 -0
  17. pipelines/Router/__pycache__/__init__.cpython-310.pyc +0 -0
  18. pipelines/Router/__pycache__/__init__.cpython-311.pyc +0 -0
  19. pipelines/Router/__pycache__/__init__.cpython-312.pyc +0 -0
  20. pipelines/Router/__pycache__/router.cpython-310.pyc +0 -0
  21. pipelines/Router/__pycache__/router.cpython-311.pyc +0 -0
  22. pipelines/Router/__pycache__/router.cpython-312.pyc +0 -0
  23. pipelines/Router/router.py +45 -0
  24. pipelines/SafetyChecker/__init__.py +1 -0
  25. pipelines/SafetyChecker/__pycache__/__init__.cpython-310.pyc +0 -0
  26. pipelines/SafetyChecker/__pycache__/__init__.cpython-311.pyc +0 -0
  27. pipelines/SafetyChecker/__pycache__/__init__.cpython-312.pyc +0 -0
  28. pipelines/SafetyChecker/__pycache__/safety_checker.cpython-310.pyc +0 -0
  29. pipelines/SafetyChecker/__pycache__/safety_checker.cpython-311.pyc +0 -0
  30. pipelines/SafetyChecker/__pycache__/safety_checker.cpython-312.pyc +0 -0
  31. pipelines/SafetyChecker/safety_checker.py +98 -0
  32. pipelines/__pycache__/config.cpython-310.pyc +0 -0
  33. pipelines/__pycache__/config.cpython-311.pyc +0 -0
  34. pipelines/__pycache__/main.cpython-310.pyc +0 -0
  35. pipelines/__pycache__/main.cpython-311.pyc +0 -0
  36. pipelines/__pycache__/main.cpython-312.pyc +0 -0
  37. pipelines/__pycache__/schemas.cpython-310.pyc +0 -0
  38. pipelines/__pycache__/schemas.cpython-311.pyc +0 -0
  39. pipelines/bk/semantic_cache/Cache.py +207 -0
  40. pipelines/bk/semantic_cache/__init__.py +1 -0
  41. pipelines/bk/semantic_cache/__pycache__/Cache.cpython-311.pyc +0 -0
  42. pipelines/bk/semantic_cache/__pycache__/__init__.cpython-311.pyc +0 -0
  43. pipelines/bk/semantic_cache/__pycache__/adapter.cpython-311.pyc +0 -0
  44. pipelines/bk/semantic_cache/__pycache__/sbert.cpython-311.pyc +0 -0
  45. pipelines/bk/semantic_cache/adapter.py +40 -0
  46. pipelines/bk/semantic_cache/sbert.py +70 -0
  47. pipelines/blueprints/__pycache__/function_calling_blueprint.cpython-311.pyc +0 -0
  48. pipelines/blueprints/__pycache__/prompts.cpython-310.pyc +0 -0
  49. pipelines/blueprints/__pycache__/prompts.cpython-311.pyc +0 -0
  50. pipelines/blueprints/__pycache__/prompts.cpython-312.pyc +0 -0
pipelines/.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
pipelines/.env.example ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llm_api_1 = ""
2
+ llm_api_2 = ""
3
+ llm_api_3 = ""
4
+ llm_api_4 = ""
5
+ llm_api_5 = ""
6
+ llm_api_6=""
7
+ llm_api_7=""
8
+ llm_api_8=""
9
+ llm_api_9=""
10
+ llm_api_10=""
11
+ COHERE_API_KEY=mvJsIewR1qw6LKmauMtoHubyINaAAjdZtZWadkc4
12
+ FASTAPI_URL = https://fast-api.snova.ai/v1/chat/completions
13
+ FASTAPI_API_KEY = ""
14
+
15
+
16
+
17
+ # mistral api key
18
+ MISTRAL_API_KEY1= ""
19
+
20
+
21
+
22
+ #openai key
23
+ OPENAI_KEY = ""
pipelines/.gitignore ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__
2
+ .env
3
+
4
+ /litellm
5
+
6
+
7
+ pipelines/*
8
+ !pipelines/.gitignore
9
+ .DS_Store
10
+
11
+ .venv
12
+ venv/
pipelines/BM25/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from BM25.bm25sretriever import BM25SRetriever
pipelines/BM25/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (193 Bytes). View file
 
pipelines/BM25/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (242 Bytes). View file
 
pipelines/BM25/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (227 Bytes). View file
 
pipelines/BM25/__pycache__/bm25sretriever.cpython-310.pyc ADDED
Binary file (6.71 kB). View file
 
pipelines/BM25/__pycache__/bm25sretriever.cpython-311.pyc ADDED
Binary file (8.08 kB). View file
 
pipelines/BM25/__pycache__/bm25sretriever.cpython-312.pyc ADDED
Binary file (7.05 kB). View file
 
pipelines/BM25/bm25sretriever.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+ from typing import Any, Callable, Dict, Iterable, List, Optional
5
+
6
+ from langchain_core.callbacks import CallbackManagerForRetrieverRun
7
+ from langchain_core.documents import Document
8
+ from langchain_core.pydantic_v1 import Field
9
+ from langchain_core.retrievers import BaseRetriever
10
+ import bm25s
11
+ import re
12
+ import string
13
+ from tqdm import tqdm
14
+ from pyvi.ViTokenizer import tokenize
15
+
16
+ def clean_text(text: str) -> str:
17
+ text = re.sub('<.*?>', '', text).strip()
18
+ text = text.encode('utf-8', 'ignore').decode('utf-8')
19
+
20
+ text = re.sub(r'\s+', ' ', text).strip()
21
+ return text
22
+
23
+ def normalize_text(text: str) -> str:
24
+ list_punctuation = string.punctuation.replace('_', '')
25
+
26
+ for punct in list_punctuation:
27
+ text = text.replace(punct, ' ')
28
+
29
+ text = text.lower().strip()
30
+ text = re.sub(r'\s+', ' ', text).strip()
31
+ return text
32
+
33
+ def process_text(text: str) -> str:
34
+ text = clean_text(text)
35
+ text = tokenize(text)
36
+ text = normalize_text(text)
37
+ return text
38
+
39
+ def default_preprocessing_func(text: str) -> List[str]:
40
+ if type(text) == tuple :
41
+ fin_text = [process_text(doc) for doc in tqdm(text)]
42
+ elif type(text) == str :
43
+ fin_text = process_text(text)
44
+ token_corpus = bm25s.tokenize(texts=fin_text, stopwords = "vi", return_ids= False , show_progress=False)
45
+ return token_corpus
46
+
47
+
48
+ class BM25SRetriever(BaseRetriever):
49
+ """A toy retriever that contains the top k documents that contain the user query.
50
+
51
+ This retriever only implements the sync method _get_relevant_documents.
52
+
53
+ If the retriever were to involve file access or network access, it could benefit
54
+ from a native async implementation of `_aget_relevant_documents`.
55
+
56
+ As usual, with Runnables, there's a default async implementation that's provided
57
+ that delegates to the sync implementation running on another thread.
58
+ """
59
+ vectorizer: Any
60
+ """ BM25S vectorizer."""
61
+ docs: List[Document] = Field(repr=False)
62
+ """List of documents to retrieve from."""
63
+ k: int = 4
64
+ """Number of top results to return"""
65
+ preprocess_func: Callable[[str], List[str]] = default_preprocessing_func
66
+ """ Preprocessing function to use on the text before BM25 vectorization."""
67
+ save_directory : Optional[str] = None
68
+ """ Directory for saving BM25S index."""
69
+ activate_numba: bool = False
70
+ """Accelerate backend"""
71
+ class Config:
72
+ arbitrary_types_allowed = True
73
+ @classmethod
74
+ def from_texts(
75
+ cls,
76
+ texts: Iterable[str],
77
+ metadatas: Optional[Iterable[dict]] = None,
78
+ bm25_params: Optional[Dict[str, Any]] = None,
79
+ save_directory : Optional[str] = save_directory,
80
+ preprocess_func: Callable[[str], List[str]] = default_preprocessing_func,
81
+ **kwargs: Any,
82
+ ) -> BM25SRetriever:
83
+ """
84
+ Create a BM25Retriever from a list of texts.
85
+ Args:
86
+ texts: A list of texts to vectorize.
87
+ metadatas: A list of metadata dicts to associate with each text.
88
+ bm25s_params: Parameters to pass to the BM25s vectorizer.
89
+ preprocess_func: A function to preprocess each text before vectorization.
90
+ **kwargs: Any other arguments to pass to the retriever.
91
+
92
+ Returns:
93
+ A BM25SRetriever instance.
94
+ """
95
+ try:
96
+ from bm25s import BM25
97
+ except ImportError:
98
+ raise ImportError(
99
+ "Could not import bm25s, please install with `pip install "
100
+ "bm25s`."
101
+ )
102
+ bm25_params = bm25_params or {}
103
+ if save_directory and Path(save_directory).exists():
104
+ try:
105
+ vectorizer = BM25.load(save_directory)
106
+ except Exception as e:
107
+ print(f"Failed to load BM25 index from {save_directory}: {e}")
108
+ print("Proceeding with indexing from scratch.")
109
+ texts_processed = preprocess_func(texts)
110
+ vectorizer = BM25(**bm25_params)
111
+ vectorizer.index(texts_processed)
112
+ if save_directory:
113
+ vectorizer.save(save_directory)
114
+
115
+ else:
116
+ texts_processed = preprocess_func(texts)
117
+ vectorizer = BM25(**bm25_params)
118
+ vectorizer.index(texts_processed)
119
+ if save_directory:
120
+ vectorizer.save(save_directory)
121
+
122
+ metadatas = metadatas or ({} for _ in texts)
123
+ docs = [Document(page_content=t, metadata=m) for t, m in zip(texts, metadatas)]
124
+ return cls(
125
+ vectorizer=vectorizer, docs=docs, preprocess_func=preprocess_func, save_directory=save_directory, **kwargs
126
+ )
127
+
128
+ @classmethod
129
+ def from_documents(
130
+ cls,
131
+ documents: Iterable[Document],
132
+ *,
133
+ bm25_params: Optional[Dict[str, Any]] = None,
134
+ preprocess_func: Callable[[str], List[str]] = default_preprocessing_func,
135
+
136
+ **kwargs: Any,
137
+ ) -> BM25SRetriever:
138
+ """
139
+ Create a BM25Retriever from a list of Documents.
140
+ Args:
141
+ documents: A list of Documents to vectorize.
142
+ bm25_params: Parameters to pass to the BM25 vectorizer.
143
+ preprocess_func: A function to preprocess each text before vectorization.
144
+ **kwargs: Any other arguments to pass to the retriever.
145
+
146
+ Returns:
147
+ A BM25Retriever instance.
148
+ """
149
+ texts, metadatas = zip(*((d.page_content, d.metadata) for d in documents))
150
+ return cls.from_texts(
151
+ texts=texts,
152
+ bm25_params=bm25_params,
153
+ metadatas=metadatas,
154
+ preprocess_func=preprocess_func,
155
+ **kwargs,
156
+ )
157
+
158
+ def _get_relevant_documents(
159
+ self, query: str, *, run_manager: CallbackManagerForRetrieverRun
160
+ ) -> List[Document]:
161
+ processed_query = self.preprocess_func(query)
162
+ if self.activate_numba :
163
+ self.vectorizer.activate_numba_scorer()
164
+ return_docs = self.vectorizer.retrieve(processed_query, k=self.k, backend_selection="numba")
165
+ return [self.docs[i] for i in return_docs.documents[0]]
166
+ else :
167
+ return_docs, scores = self.vectorizer.retrieve(processed_query, self.docs, k = self.k)
168
+ return [return_docs[0, i] for i in range(return_docs.shape[1])]
pipelines/BM25/preprocess.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import string
3
+ from pyvi.ViTokenizer import tokenize
4
+ import bm25s
5
+
6
+ def clean_text(text):
7
+ text = re.sub('<.*?>', '', text).strip()
8
+ text = re.sub(r'(\s)+', r'\1', text)
9
+ return text
10
+
11
+ def normalize_text(text):
12
+ listpunctuation = string.punctuation.replace('_', '')
13
+ for i in listpunctuation:
14
+ text = text.replace(i, ' ')
15
+ return text.lower().strip()
16
+
17
+ def process_text(text):
18
+ text = clean_text(text)
19
+ text = tokenize(text)
20
+ text = normalize_text(text)
21
+ return text
pipelines/CONTRIBUTING.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Contributing to Pipelines
2
+
3
+ 🚀 **Welcome, Contributors!** 🚀
4
+
5
+ We are thrilled to have you join the Pipelines community! Your contributions are essential to making Pipelines a powerful and versatile framework for extending OpenAI-compatible applications' capabilities. This document provides guidelines to ensure your contributions are smooth and effective.
6
+
7
+ ### 📌 Key Points
8
+
9
+ - **Scope of Pipelines:** Remember that Pipelines is a framework designed to enhance OpenAI interactions, specifically through a plugin-like approach. Focus your contributions on making Pipelines more robust, flexible, and user-friendly within this context.
10
+ - **Open WebUI Integration:** Pipelines is primarily designed to work with Open WebUI. While contributions that expand compatibility with other platforms are welcome, prioritize functionalities that seamlessly integrate with Open WebUI's ecosystem.
11
+
12
+ ### 🚨 Reporting Issues
13
+
14
+ Encountered a bug or have an idea for improvement? We encourage you to report it! Here's how:
15
+
16
+ 1. **Check Existing Issues:** Browse the [Issues tab](https://github.com/open-webui/pipelines/issues) to see if the issue or suggestion has already been reported.
17
+ 2. **Open a New Issue:** If it's a new issue, feel free to open one. Follow the issue template for clear and concise reporting. Provide detailed descriptions, steps to reproduce, expected outcomes, and actual results. This helps us understand and resolve the issue efficiently.
18
+
19
+ ### 🧭 Scope of Support
20
+
21
+ - **Python Fundamentals:** Pipelines leverages Python. Basic Python knowledge is essential for contributing effectively.
22
+
23
+ ## 💡 Contributing
24
+
25
+ Ready to make a difference? Here's how you can contribute to Pipelines:
26
+
27
+ ### 🛠 Pull Requests
28
+
29
+ We encourage pull requests to improve Pipelines! Here's the process:
30
+
31
+ 1. **Discuss Your Idea:** If your contribution involves significant changes, discuss it in the [Issues tab](https://github.com/open-webui/pipelines/issues) first. This ensures your idea aligns with the project's vision.
32
+ 2. **Coding Standards:** Follow the project's coding standards and write clear, descriptive commit messages.
33
+ 3. **Update Documentation:** If your contribution impacts documentation, update it accordingly.
34
+ 4. **Submit Your Pull Request:** Submit your pull request and provide a clear summary of your changes.
35
+
36
+ ### 📚 Documentation
37
+
38
+ Help make Pipelines more accessible by:
39
+
40
+ - **Writing Tutorials:** Create guides for setting up, using, and customizing Pipelines.
41
+ - **Improving Documentation:** Enhance existing documentation for clarity, completeness, and accuracy.
42
+ - **Adding Examples:** Contribute pipelines examples that showcase different functionalities and use cases.
43
+
44
+ ### 🤔 Questions & Feedback
45
+
46
+ Got questions or feedback? Join our [Discord community](https://discord.gg/5rJgQTnV4s) or open an issue. We're here to help!
47
+
48
+ ## 🙏 Thank You!
49
+
50
+ Your contributions are invaluable to Pipelines' success! We are excited to see what you bring to the project. Together, we can create a powerful and versatile framework for extending OpenAI capabilities. 🌟
pipelines/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Timothy Jaeryang Baek
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pipelines/README.md ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <p align="center">
2
+ <a href="#"><img src="./header.png" alt="Pipelines Logo"></a>
3
+ </p>
4
+
5
+ # Pipelines: UI-Agnostic OpenAI API Plugin Framework
6
+
7
+ Welcome to **Pipelines**, an [Open WebUI](https://github.com/open-webui) initiative. Pipelines bring modular, customizable workflows to any UI client supporting OpenAI API specs – and much more! Easily extend functionalities, integrate unique logic, and create dynamic workflows with just a few lines of code.
8
+
9
+ ## 🚀 Why Choose Pipelines?
10
+
11
+ - **Limitless Possibilities:** Easily add custom logic and integrate Python libraries, from AI agents to home automation APIs.
12
+ - **Seamless Integration:** Compatible with any UI/client supporting OpenAI API specs. (Only pipe-type pipelines are supported; filter types require clients with Pipelines support.)
13
+ - **Custom Hooks:** Build and integrate custom pipelines.
14
+
15
+ ### Examples of What You Can Achieve:
16
+
17
+ - [**Function Calling Pipeline**](/examples/filters/function_calling_filter_pipeline.py): Easily handle function calls and enhance your applications with custom logic.
18
+ - [**Custom RAG Pipeline**](/examples/pipelines/rag/llamaindex_pipeline.py): Implement sophisticated Retrieval-Augmented Generation pipelines tailored to your needs.
19
+ - [**Message Monitoring Using Langfuse**](/examples/filters/langfuse_filter_pipeline.py): Monitor and analyze message interactions in real-time using Langfuse.
20
+ - [**Rate Limit Filter**](/examples/filters/rate_limit_filter_pipeline.py): Control the flow of requests to prevent exceeding rate limits.
21
+ - [**Real-Time Translation Filter with LibreTranslate**](/examples/filters/libretranslate_filter_pipeline.py): Seamlessly integrate real-time translations into your LLM interactions.
22
+ - [**Toxic Message Filter**](/examples/filters/detoxify_filter_pipeline.py): Implement filters to detect and handle toxic messages effectively.
23
+ - **And Much More!**: The sky is the limit for what you can accomplish with Pipelines and Python. [Check out our scaffolds](/examples/scaffolds) to get a head start on your projects and see how you can streamline your development process!
24
+
25
+ ## 🔧 How It Works
26
+
27
+ <p align="center">
28
+ <a href="./docs/images/workflow.png"><img src="./docs/images/workflow.png" alt="Pipelines Workflow"></a>
29
+ </p>
30
+
31
+ Integrating Pipelines with any OpenAI API-compatible UI client is simple. Launch your Pipelines instance and set the OpenAI URL on your client to the Pipelines URL. That's it! You're ready to leverage any Python library for your needs.
32
+
33
+ ## ⚡ Quick Start with Docker
34
+
35
+ > [!WARNING]
36
+ > Pipelines are a plugin system with arbitrary code execution — **don't fetch random pipelines from sources you don't trust**.
37
+
38
+ For a streamlined setup using Docker:
39
+
40
+ 1. **Run the Pipelines container:**
41
+
42
+ ```sh
43
+ docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -v pipelines:/app/pipelines --name pipelines --restart always ghcr.io/open-webui/pipelines:main
44
+ ```
45
+
46
+ 2. **Connect to Open WebUI:**
47
+
48
+ - Navigate to the **Settings > Connections > OpenAI API** section in Open WebUI.
49
+ - Set the API URL to `http://localhost:9099` and the API key to `0p3n-w3bu!`. Your pipelines should now be active.
50
+
51
+ > [!NOTE]
52
+ > If your Open WebUI is running in a Docker container, replace `localhost` with `host.docker.internal` in the API URL.
53
+
54
+ 3. **Manage Configurations:**
55
+
56
+ - In the admin panel, go to **Admin Settings > Pipelines tab**.
57
+ - Select your desired pipeline and modify the valve values directly from the WebUI.
58
+
59
+ > [!TIP]
60
+ > If you are unable to connect, it is most likely a Docker networking issue. We encourage you to troubleshoot on your own and share your methods and solutions in the discussions forum.
61
+
62
+ If you need to install a custom pipeline with additional dependencies:
63
+
64
+ - **Run the following command:**
65
+
66
+ ```sh
67
+ docker run -d -p 9099:9099 --add-host=host.docker.internal:host-gateway -e PIPELINES_URLS="https://github.com/open-webui/pipelines/blob/main/examples/filters/detoxify_filter_pipeline.py" -v pipelines:/app/pipelines --name pipelines --restart always ghcr.io/open-webui/pipelines:main
68
+ ```
69
+
70
+ Alternatively, you can directly install pipelines from the admin settings by copying and pasting the pipeline URL, provided it doesn't have additional dependencies.
71
+
72
+ That's it! You're now ready to build customizable AI integrations effortlessly with Pipelines. Enjoy!
73
+
74
+ ## 📦 Installation and Setup
75
+
76
+ Get started with Pipelines in a few easy steps:
77
+
78
+ 1. **Ensure Python 3.11 is installed.**
79
+ 2. **Clone the Pipelines repository:**
80
+
81
+ ```sh
82
+ git clone https://github.com/open-webui/pipelines.git
83
+ cd pipelines
84
+ ```
85
+
86
+ 3. **Install the required dependencies:**
87
+
88
+ ```sh
89
+ pip install -r requirements.txt
90
+ ```
91
+
92
+ 4. **Start the Pipelines server:**
93
+
94
+ ```sh
95
+ sh ./start.sh
96
+ ```
97
+
98
+ Once the server is running, set the OpenAI URL on your client to the Pipelines URL. This unlocks the full capabilities of Pipelines, integrating any Python library and creating custom workflows tailored to your needs.
99
+
100
+ ## 📂 Directory Structure and Examples
101
+
102
+ The `/pipelines` directory is the core of your setup. Add new modules, customize existing ones, and manage your workflows here. All the pipelines in the `/pipelines` directory will be **automatically loaded** when the server launches.
103
+
104
+ You can change this directory from `/pipelines` to another location using the `PIPELINES_DIR` env variable.
105
+
106
+ ### Integration Examples
107
+
108
+ Find various integration examples in the `/examples` directory. These examples show how to integrate different functionalities, providing a foundation for building your own custom pipelines.
109
+
110
+ ## 🎉 Work in Progress
111
+
112
+ We’re continuously evolving! We'd love to hear your feedback and understand which hooks and features would best suit your use case. Feel free to reach out and become a part of our Open WebUI community!
113
+
114
+ Our vision is to push **Pipelines** to become the ultimate plugin framework for our AI interface, **Open WebUI**. Imagine **Open WebUI** as the WordPress of AI interfaces, with **Pipelines** being its diverse range of plugins. Join us on this exciting journey! 🌍
pipelines/Router/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from Router.router import Evaluator
pipelines/Router/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (184 Bytes). View file
 
pipelines/Router/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (233 Bytes). View file
 
pipelines/Router/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (218 Bytes). View file
 
pipelines/Router/__pycache__/router.cpython-310.pyc ADDED
Binary file (2.16 kB). View file
 
pipelines/Router/__pycache__/router.cpython-311.pyc ADDED
Binary file (3.23 kB). View file
 
pipelines/Router/__pycache__/router.cpython-312.pyc ADDED
Binary file (2.81 kB). View file
 
pipelines/Router/router.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, field_validator
2
+ from typing import Optional
3
+ import os
4
+ from llmdantic import LLMdantic, LLMdanticConfig
5
+ from sambanova.langchain_wrappers import SambaNovaFastAPI
6
+ from dotenv import load_dotenv
7
+ from llmdantic import LLMdanticResult
8
+
9
+
10
+ current_dir = os.getcwd()
11
+ utils_dir = os.path.abspath(os.path.join(current_dir, '..'))
12
+ load_dotenv(os.path.join(utils_dir, '.env'), override=True)
13
+ # load_dotenv('.env', override=True)
14
+
15
+
16
+ class Catergories_Classify_Input(BaseModel):
17
+ text: str
18
+
19
+ class Catergories_Classify_Output(BaseModel):
20
+ result: str
21
+
22
+ @field_validator("result")
23
+ def catergory_result_must_not_be_empty(cls, v) -> bool:
24
+ """Category result must not be empty"""
25
+ if not v.strip():
26
+ raise ValueError("Category result must not be empty")
27
+ return v
28
+
29
+
30
+ class Evaluator:
31
+ def __init__(self, llm : Optional[str], prompt: str):
32
+ self.llm = SambaNovaFastAPI(model=llm, fastapi_url = "https://fast-api.snova.ai/v1/chat/completions" , fastapi_api_key = "dHVhbmFuaC5uay4xOF9fZ21haWwuY29tOlRWbG9yQkxhNUY=")
33
+ self.prompt = prompt
34
+ self.config = LLMdanticConfig(
35
+ objective=self.prompt,
36
+ inp_schema=Catergories_Classify_Input,
37
+ out_schema=Catergories_Classify_Output,
38
+ retries=5,
39
+ )
40
+ self.llmdantic = LLMdantic(llm=self.llm, config=self.config)
41
+
42
+ def classify_text(self, text: str) -> Optional[Catergories_Classify_Output]:
43
+ data = Catergories_Classify_Input(text=text)
44
+ result: LLMdanticResult = self.llmdantic.invoke(data)
45
+ return result.output
pipelines/SafetyChecker/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from SafetyChecker.safety_checker import SafetyChecker
pipelines/SafetyChecker/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (210 Bytes). View file
 
pipelines/SafetyChecker/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (259 Bytes). View file
 
pipelines/SafetyChecker/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (244 Bytes). View file
 
pipelines/SafetyChecker/__pycache__/safety_checker.cpython-310.pyc ADDED
Binary file (3.46 kB). View file
 
pipelines/SafetyChecker/__pycache__/safety_checker.cpython-311.pyc ADDED
Binary file (5.15 kB). View file
 
pipelines/SafetyChecker/__pycache__/safety_checker.cpython-312.pyc ADDED
Binary file (4.36 kB). View file
 
pipelines/SafetyChecker/safety_checker.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from typing import Optional
5
+ from langchain_core.pydantic_v1 import BaseModel, Field
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_groq import ChatGroq
8
+ import requests
9
+ load_dotenv()
10
+
11
+ policy_mapping = {
12
+ "S1": "Tội phạm bạo lực",
13
+ "S2": "Tội phạm không bạo lực",
14
+ "S3": "Tội phạm liên quan đến tình dục",
15
+ "S4": "Khai thác tình dục trẻ em",
16
+ "S5": "Phỉ báng",
17
+ "S6": "Tư vấn chuyên biệt",
18
+ "S7": "Riêng tư",
19
+ "S8": "Sở hữu trí tuệ",
20
+ "S9": "Vũ khí không phân biệt",
21
+ "S10": "Thù ghét",
22
+ "S11": "Tự sát & Tự gây hại",
23
+ "S12": "Nội dung tình dục",
24
+ "S13": "Bầu cử",
25
+ "S14": "Ngôn ngữ xúc phạm và thô tục"
26
+ }
27
+
28
+ class SafetyChecker(BaseModel):
29
+ model_name: str = Field(default="llama-guard-3-8b")
30
+ temperature: float = Field(default=0.0)
31
+ api_key: Optional[str] = Field(default=None)
32
+ prompt_safe: Optional[ChatPromptTemplate] = None
33
+ system_prompt: str = Field(default="""Task type
34
+ Check if there is unsafe content in the messages in conversations according to our safety policy with the below categories.
35
+
36
+ Policy
37
+ <BEGIN UNSAFE CONTENT CATEGORIES>
38
+ S1: Violent Crimes.
39
+ S2: Non-Violent Crimes
40
+ S3: Sex-Related Crimes
41
+ S4: Child Sexual Exploitation
42
+ S5: Defamation
43
+ S6: Specialized Advice
44
+ S7: Privacy
45
+ S8: Intellectual Property
46
+ S9: Indiscriminate Weapon
47
+ S10: Hate
48
+ S11: Suicide & Self-Harm
49
+ S12: Sexual Content
50
+ S13: Elections
51
+ S14: Offensive and Vulgar Language
52
+ <END UNSAFE CONTENT CATEGORIES>
53
+ Output format
54
+ Provide your safety assessment for user message in the below :
55
+ First line must read 'safe' or 'unsafe'.
56
+
57
+ If unsafe, a second line must include a comma-separated list of violated categories.
58
+ """)
59
+
60
+
61
+
62
+ class Config:
63
+ arbitrary_types_allowed = True
64
+
65
+ def __init__(self, api_key: Optional[str] = None, **data):
66
+ super().__init__(**data)
67
+ load_dotenv()
68
+ self.api_key = api_key
69
+ # Initialize the ChatPromptTemplate as an instance attribute
70
+ self.prompt_safe = ChatPromptTemplate.from_messages(
71
+ [
72
+ ("system", self.system_prompt),
73
+ ("human", "{question}"),
74
+ ]
75
+ )
76
+
77
+ def check_safety(self, question: str) -> str:
78
+ llm = ChatGroq(model_name=self.model_name, temperature=self.temperature, api_key=self.api_key)
79
+ question_save = self.prompt_safe | llm
80
+ out = question_save.invoke({"question": question})
81
+ result_lines = out.content.strip().split('\n')
82
+
83
+ if result_lines[0] == 'unsafe':
84
+ # Retrieve the violated categories
85
+ violated_categories = result_lines[1]
86
+ categories = violated_categories.split(',')
87
+
88
+ # Trim whitespace and look up descriptions
89
+ category_descriptions = [policy_mapping.get(cat.strip(), cat.strip()) for cat in categories]
90
+
91
+ # Join descriptions into a single string
92
+ descriptions_str = ', '.join(category_descriptions)
93
+
94
+ return f"Câu hỏi không được cho phép vì vi phạm chính sách an toàn cộng đồng : {descriptions_str}"
95
+
96
+ return "safe"
97
+
98
+
pipelines/__pycache__/config.cpython-310.pyc ADDED
Binary file (432 Bytes). View file
 
pipelines/__pycache__/config.cpython-311.pyc ADDED
Binary file (703 Bytes). View file
 
pipelines/__pycache__/main.cpython-310.pyc ADDED
Binary file (16.1 kB). View file
 
pipelines/__pycache__/main.cpython-311.pyc ADDED
Binary file (36 kB). View file
 
pipelines/__pycache__/main.cpython-312.pyc ADDED
Binary file (32 kB). View file
 
pipelines/__pycache__/schemas.cpython-310.pyc ADDED
Binary file (1.65 kB). View file
 
pipelines/__pycache__/schemas.cpython-311.pyc ADDED
Binary file (2.46 kB). View file
 
pipelines/bk/semantic_cache/Cache.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import time
3
+ import faiss
4
+
5
+ ##############################Sentence_Bert#########################################
6
+
7
+ import numpy as np
8
+ from sentence_transformers import SentenceTransformer # pylint: disable=C0413
9
+
10
+ from abc import ABCMeta, abstractmethod
11
+
12
+
13
+ class BaseEmbedding(metaclass=ABCMeta):
14
+ """
15
+ Base Embedding interface.
16
+ """
17
+
18
+ @abstractmethod
19
+ def to_embeddings(self, data, **kwargs):
20
+ pass
21
+
22
+ @property
23
+ @abstractmethod
24
+ def dimension(self) -> int:
25
+ return 0
26
+
27
+
28
+
29
+ class SBERT(BaseEmbedding):
30
+ """Generate sentence embedding for given text using pretrained models of Sentence Transformers.
31
+
32
+ :param model: model name, defaults to 'all-MiniLM-L6-v2'.
33
+ :type model: str
34
+
35
+ Example:
36
+ .. code-block:: python
37
+
38
+ from gptcache.embedding import SBERT
39
+
40
+ test_sentence = 'Hello, world.'
41
+ encoder = SBERT('all-MiniLM-L6-v2')
42
+ embed = encoder.to_embeddings(test_sentence)
43
+ """
44
+
45
+ def __init__(self, model: str = "all-MiniLM-L6-v2"):
46
+ self.model = SentenceTransformer(model)
47
+ self.model.eval()
48
+ self.__dimension = None
49
+
50
+ def to_embeddings(self, data, **_):
51
+ """Generate embedding given text input
52
+
53
+ :param data: text in string.
54
+ :type data: str
55
+
56
+ :return: a text embedding in shape of (dim,).
57
+ """
58
+ if not isinstance(data, list):
59
+ data = [data]
60
+ emb = self.model.encode(data)
61
+ _, dim = emb.shape
62
+ if not self.__dimension:
63
+ self.__dimension = dim
64
+ return np.array(emb).astype("float32")
65
+
66
+ @property
67
+ def dimension(self):
68
+ """Embedding dimension.
69
+
70
+ :return: embedding dimension
71
+ """
72
+ if not self.__dimension:
73
+ embd = self.model.encode(["foo"])
74
+ _, self.__dimension = embd.shape
75
+ return self.__dimension
76
+
77
+
78
+
79
+ #################################### Adapter ########################################
80
+ def init_cache(embedding_model: str = "all-MiniLM-L6-v2"):
81
+ """Initializes the cache with a Faiss index and an SBERT model.
82
+
83
+ Args:
84
+ embedding_model (str): The name of the SBERT model to use.
85
+
86
+ Returns:
87
+ tuple: (index, encoder) where
88
+ - index is a Faiss index for storing embeddings.
89
+ - encoder is an SBERT model instance.
90
+ """
91
+
92
+ encoder = SBERT(embedding_model)
93
+ dimension = encoder.dimension
94
+ print(dimension)
95
+ index = faiss.IndexFlatL2(dimension)
96
+ if index.is_trained:
97
+ print('Index initialized and ready for use')
98
+
99
+ return index, encoder
100
+
101
+
102
+ def retrieve_cache(json_file):
103
+ try:
104
+ with open(json_file, 'r') as file:
105
+ cache = json.load(file)
106
+ except FileNotFoundError:
107
+ cache = {'questions': [], 'answers': []}
108
+
109
+ return cache
110
+
111
+
112
+
113
+ def store_cache(json_file, cache):
114
+ with open(json_file, 'w', encoding = 'utf-8') as file:
115
+ json.dump(cache, file)
116
+
117
+ #####################################################################3
118
+ class Cache:
119
+ def __init__(self, embedding = "all-MiniLM-L6-v2" , json_file="cache_file.json", thresold=0.5, max_response=100, eviction_policy='FIFO'):
120
+ """Initializes the semantic cache.
121
+
122
+ Args:
123
+ json_file (str): The name of the JSON file where the cache is stored.
124
+ thresold (float): The threshold for the Euclidean distance to determine if a question is similar.
125
+ max_response (int): The maximum number of responses the cache can store.
126
+ eviction_policy (str): The policy for evicting items from the cache.
127
+ This can be any policy, but 'FIFO' (First In First Out) has been implemented for now.
128
+ If None, no eviction policy will be applied.
129
+ """
130
+
131
+ # Initialize Faiss index with Euclidean distance
132
+ self.index, self.encoder = init_cache(embedding)
133
+
134
+ # Set Euclidean distance threshold
135
+ # a distance of 0 means identicals sentences
136
+ # We only return from cache sentences under this thresold
137
+ self.euclidean_threshold = thresold
138
+ self.is_missed = True
139
+ self.json_file = json_file
140
+ self.cache = retrieve_cache(self.json_file)
141
+ self.max_response = max_response
142
+ self.eviction_policy = eviction_policy
143
+
144
+ def evict(self):
145
+
146
+ """Evicts an item from the cache based on the eviction policy."""
147
+ if self.eviction_policy and len(self.cache["questions"]) > self.max_response:
148
+ for _ in range((len(self.cache["questions"]) - self.max_response)):
149
+ if self.eviction_policy == 'FIFO':
150
+ self.cache["questions"].pop(0)
151
+ self.cache["answers"].pop(0)
152
+ def cached_hit(self, question: str) -> str:
153
+ """Handles the cache hit logic by retrieving the answer from the cache.
154
+
155
+ Args:
156
+ question (str): The input question.
157
+ embedding: The embedding of the question.
158
+
159
+ Returns:
160
+ str: The cached answer.
161
+ """
162
+ # Search for the nearest neighbor in the index
163
+ embedding = self.encoder.to_embeddings([question])
164
+ self.index.nprobe = 8
165
+ D, I = self.index.search(embedding, 1)
166
+ print(D)
167
+ if D[0] >= 0:
168
+ if I[0][0] >= 0 and D[0][0] / 100 <= self.euclidean_threshold:
169
+ row_id = int(I[0][0])
170
+ print('Answer recovered from Cache.')
171
+ print(f'Distance: {D[0][0]:.3f} (Threshold: {self.euclidean_threshold})')
172
+ print(f'Found in cache at row: {row_id} with score: {D[0][0]:.3f}')
173
+ self.is_missed =False
174
+ return self.cache['answers'][row_id]
175
+ self.is_missed = True
176
+ return embedding , self.is_missed
177
+
178
+
179
+ def cache_miss(self, question: str, embedding , answer) -> str:
180
+ """Handles the cache miss logic by querying the model and updating the cache.
181
+
182
+ Args:
183
+ question (str): The input question.
184
+ embedding: The embedding of the question take from cache_hit if hit nothing
185
+ answer (str) : The answer from LLMs
186
+ Returns:
187
+ Append to cache and return answer.
188
+ """
189
+
190
+ # Update the cache with the new question, embedding, and answer
191
+ self.cache['questions'].append(question)
192
+ self.cache['answers'].append(answer)
193
+
194
+ print('Answer not found in cache, appending new answer.')
195
+ print(f'Response: {answer}')
196
+
197
+ # Add the new embedding to the index
198
+ self.index.add(embedding)
199
+
200
+ # Evict items if necessary
201
+ self.evict()
202
+
203
+ # Save the updated cache to the JSON file
204
+ store_cache(self.json_file, self.cache)
205
+ self.is_missed = False
206
+ return answer
207
+
pipelines/bk/semantic_cache/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from semantic_cache.Cache import Cache
pipelines/bk/semantic_cache/__pycache__/Cache.cpython-311.pyc ADDED
Binary file (10.1 kB). View file
 
pipelines/bk/semantic_cache/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (244 Bytes). View file
 
pipelines/bk/semantic_cache/__pycache__/adapter.cpython-311.pyc ADDED
Binary file (2.15 kB). View file
 
pipelines/bk/semantic_cache/__pycache__/sbert.cpython-311.pyc ADDED
Binary file (3.53 kB). View file
 
pipelines/bk/semantic_cache/adapter.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from sbert import SBERT as sentence_bert
3
+ import faiss
4
+
5
+ def init_cache(embedding_model: str = "dangvantuan/vietnamese-embedding"):
6
+ """Initializes the cache with a Faiss index and an SBERT model.
7
+
8
+ Args:
9
+ embedding_model (str): The name of the SBERT model to use.
10
+
11
+ Returns:
12
+ tuple: (index, encoder) where
13
+ - index is a Faiss index for storing embeddings.
14
+ - encoder is an SBERT model instance.
15
+ """
16
+
17
+ encoder = sentence_bert(embedding_model)
18
+ dimension = encoder.dimension
19
+ print(dimension)
20
+ index = faiss.IndexFlatL2(dimension)
21
+ if index.is_trained:
22
+ print('Index initialized and ready for use')
23
+
24
+ return index, encoder
25
+
26
+
27
+ def retrieve_cache(json_file):
28
+ try:
29
+ with open(json_file, 'r') as file:
30
+ cache = json.load(file)
31
+ except FileNotFoundError:
32
+ cache = {'questions': [], 'answers': []}
33
+
34
+ return cache
35
+
36
+
37
+
38
+ def store_cache(json_file, cache):
39
+ with open(json_file, 'w', encoding = 'utf-8') as file:
40
+ json.dump(cache, file)
pipelines/bk/semantic_cache/sbert.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sentence_transformers import SentenceTransformer # pylint: disable=C0413
3
+
4
+ from abc import ABCMeta, abstractmethod
5
+
6
+
7
+ class BaseEmbedding(metaclass=ABCMeta):
8
+ """
9
+ Base Embedding interface.
10
+ """
11
+
12
+ @abstractmethod
13
+ def to_embeddings(self, data, **kwargs):
14
+ pass
15
+
16
+ @property
17
+ @abstractmethod
18
+ def dimension(self) -> int:
19
+ return 0
20
+
21
+
22
+
23
+ class SBERT(BaseEmbedding):
24
+ """Generate sentence embedding for given text using pretrained models of Sentence Transformers.
25
+
26
+ :param model: model name, defaults to 'all-MiniLM-L6-v2'.
27
+ :type model: str
28
+
29
+ Example:
30
+ .. code-block:: python
31
+
32
+ from gptcache.embedding import SBERT
33
+
34
+ test_sentence = 'Hello, world.'
35
+ encoder = SBERT('all-MiniLM-L6-v2')
36
+ embed = encoder.to_embeddings(test_sentence)
37
+ """
38
+
39
+ def __init__(self, model: str = "all-MiniLM-L6-v2"):
40
+ self.model = SentenceTransformer(model)
41
+ self.model.eval()
42
+ self.__dimension = None
43
+
44
+ def to_embeddings(self, data, **_):
45
+ """Generate embedding given text input
46
+
47
+ :param data: text in string.
48
+ :type data: str
49
+
50
+ :return: a text embedding in shape of (dim,).
51
+ """
52
+ if not isinstance(data, list):
53
+ data = [data]
54
+ emb = self.model.encode(data)
55
+ _, dim = emb.shape
56
+ if not self.__dimension:
57
+ self.__dimension = dim
58
+ return np.array(emb).astype("float32")
59
+
60
+ @property
61
+ def dimension(self):
62
+ """Embedding dimension.
63
+
64
+ :return: embedding dimension
65
+ """
66
+ if not self.__dimension:
67
+ embd = self.model.encode(["foo"])
68
+ _, self.__dimension = embd.shape
69
+ return self.__dimension
70
+
pipelines/blueprints/__pycache__/function_calling_blueprint.cpython-311.pyc ADDED
Binary file (6.68 kB). View file
 
pipelines/blueprints/__pycache__/prompts.cpython-310.pyc ADDED
Binary file (7.29 kB). View file
 
pipelines/blueprints/__pycache__/prompts.cpython-311.pyc ADDED
Binary file (7.35 kB). View file
 
pipelines/blueprints/__pycache__/prompts.cpython-312.pyc ADDED
Binary file (7.33 kB). View file