my
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitignore +0 -166
- .gitmodules +0 -3
- docs/en/datasets/classify/caltech101.md +81 -0
- docs/en/datasets/classify/caltech256.md +78 -0
- docs/en/datasets/classify/cifar10.md +80 -0
- docs/en/datasets/classify/cifar100.md +80 -0
- docs/en/datasets/classify/fashion-mnist.md +79 -0
- docs/en/datasets/classify/imagenet.md +83 -0
- docs/en/datasets/classify/imagenet10.md +77 -0
- docs/en/datasets/classify/imagenette.md +113 -0
- docs/en/datasets/classify/imagewoof.md +89 -0
- docs/en/datasets/classify/index.md +120 -0
- docs/en/datasets/classify/mnist.md +86 -0
- docs/en/datasets/detect/african-wildlife.md +92 -0
- docs/en/datasets/detect/argoverse.md +97 -0
- docs/en/datasets/detect/brain-tumor.md +91 -0
- docs/en/datasets/detect/coco.md +105 -0
- docs/en/datasets/detect/coco8.md +90 -0
- docs/en/datasets/detect/globalwheat2020.md +91 -0
- docs/en/datasets/detect/index.md +110 -0
- docs/en/datasets/detect/objects365.md +92 -0
- docs/en/datasets/detect/open-images-v7.md +120 -0
- docs/en/datasets/detect/roboflow-100.md +80 -0
- docs/en/datasets/detect/sku-110k.md +93 -0
- docs/en/datasets/detect/visdrone.md +92 -0
- docs/en/datasets/detect/voc.md +94 -0
- docs/en/datasets/detect/xview.md +97 -0
- docs/en/datasets/explorer/api.md +337 -0
- docs/en/datasets/explorer/dashboard.md +73 -0
- docs/en/datasets/explorer/explorer.ipynb +601 -0
- docs/en/datasets/explorer/index.md +60 -0
- docs/en/datasets/index.md +151 -0
- docs/en/datasets/obb/dota-v2.md +156 -0
- docs/en/datasets/obb/dota8.md +81 -0
- docs/en/datasets/obb/index.md +86 -0
- docs/en/datasets/pose/coco.md +95 -0
- docs/en/datasets/pose/coco8-pose.md +79 -0
- docs/en/datasets/pose/index.md +138 -0
- docs/en/datasets/pose/tiger-pose.md +98 -0
- docs/en/datasets/segment/carparts-seg.md +91 -0
- docs/en/datasets/segment/coco.md +94 -0
- docs/en/datasets/segment/coco8-seg.md +79 -0
- docs/en/datasets/segment/crack-seg.md +93 -0
- docs/en/datasets/segment/index.md +157 -0
- docs/en/datasets/segment/package-seg.md +92 -0
- docs/en/datasets/track/index.md +29 -0
- docs/mkdocs_github_authors.yaml +23 -0
- tests/conftest.py +71 -0
- tests/test_cli.py +138 -0
- tests/test_cuda.py +106 -0
.gitignore
DELETED
@@ -1,166 +0,0 @@
|
|
1 |
-
# Byte-compiled / optimized / DLL files
|
2 |
-
__pycache__/
|
3 |
-
*.py[cod]
|
4 |
-
*$py.class
|
5 |
-
|
6 |
-
# C extensions
|
7 |
-
*.so
|
8 |
-
|
9 |
-
# Distribution / packaging
|
10 |
-
.Python
|
11 |
-
build/
|
12 |
-
develop-eggs/
|
13 |
-
dist/
|
14 |
-
downloads/
|
15 |
-
eggs/
|
16 |
-
.eggs/
|
17 |
-
lib/
|
18 |
-
lib64/
|
19 |
-
parts/
|
20 |
-
sdist/
|
21 |
-
var/
|
22 |
-
wheels/
|
23 |
-
pip-wheel-metadata/
|
24 |
-
share/python-wheels/
|
25 |
-
*.egg-info/
|
26 |
-
.installed.cfg
|
27 |
-
*.egg
|
28 |
-
MANIFEST
|
29 |
-
|
30 |
-
# PyInstaller
|
31 |
-
# Usually these files are written by a python script from a template
|
32 |
-
# before PyInstaller builds the exe, so as to inject date/other info into it.
|
33 |
-
*.manifest
|
34 |
-
*.spec
|
35 |
-
|
36 |
-
# Installer logs
|
37 |
-
pip-log.txt
|
38 |
-
pip-delete-this-directory.txt
|
39 |
-
|
40 |
-
# Unit test / coverage reports
|
41 |
-
htmlcov/
|
42 |
-
.tox/
|
43 |
-
.nox/
|
44 |
-
.coverage
|
45 |
-
.coverage.*
|
46 |
-
.cache
|
47 |
-
nosetests.xml
|
48 |
-
coverage.xml
|
49 |
-
*.cover
|
50 |
-
*.py,cover
|
51 |
-
.hypothesis/
|
52 |
-
.pytest_cache/
|
53 |
-
mlruns/
|
54 |
-
|
55 |
-
# Translations
|
56 |
-
*.mo
|
57 |
-
*.pot
|
58 |
-
|
59 |
-
# Django stuff:
|
60 |
-
*.log
|
61 |
-
local_settings.py
|
62 |
-
db.sqlite3
|
63 |
-
db.sqlite3-journal
|
64 |
-
|
65 |
-
# Flask stuff:
|
66 |
-
instance/
|
67 |
-
.webassets-cache
|
68 |
-
|
69 |
-
# Scrapy stuff:
|
70 |
-
.scrapy
|
71 |
-
|
72 |
-
# Sphinx documentation
|
73 |
-
docs/_build/
|
74 |
-
|
75 |
-
# PyBuilder
|
76 |
-
target/
|
77 |
-
|
78 |
-
# Jupyter Notebook
|
79 |
-
.ipynb_checkpoints
|
80 |
-
|
81 |
-
# IPython
|
82 |
-
profile_default/
|
83 |
-
ipython_config.py
|
84 |
-
|
85 |
-
# Profiling
|
86 |
-
*.pclprof
|
87 |
-
|
88 |
-
# pyenv
|
89 |
-
.python-version
|
90 |
-
|
91 |
-
# pipenv
|
92 |
-
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
93 |
-
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
94 |
-
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
95 |
-
# install all needed dependencies.
|
96 |
-
#Pipfile.lock
|
97 |
-
|
98 |
-
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
99 |
-
__pypackages__/
|
100 |
-
|
101 |
-
# Celery stuff
|
102 |
-
celerybeat-schedule
|
103 |
-
celerybeat.pid
|
104 |
-
|
105 |
-
# SageMath parsed files
|
106 |
-
*.sage.py
|
107 |
-
|
108 |
-
# Environments
|
109 |
-
.env
|
110 |
-
.venv
|
111 |
-
.idea
|
112 |
-
env/
|
113 |
-
venv/
|
114 |
-
ENV/
|
115 |
-
env.bak/
|
116 |
-
venv.bak/
|
117 |
-
|
118 |
-
# Spyder project settings
|
119 |
-
.spyderproject
|
120 |
-
.spyproject
|
121 |
-
|
122 |
-
# VSCode project settings
|
123 |
-
.vscode/
|
124 |
-
|
125 |
-
# Rope project settings
|
126 |
-
.ropeproject
|
127 |
-
|
128 |
-
# mkdocs documentation
|
129 |
-
/site
|
130 |
-
mkdocs_github_authors.yaml
|
131 |
-
|
132 |
-
# mypy
|
133 |
-
.mypy_cache/
|
134 |
-
.dmypy.json
|
135 |
-
dmypy.json
|
136 |
-
|
137 |
-
# Pyre type checker
|
138 |
-
.pyre/
|
139 |
-
|
140 |
-
# datasets and projects
|
141 |
-
datasets/
|
142 |
-
runs/
|
143 |
-
wandb/
|
144 |
-
tests/
|
145 |
-
.DS_Store
|
146 |
-
|
147 |
-
# Neural Network weights -----------------------------------------------------------------------------------------------
|
148 |
-
weights/
|
149 |
-
*.weights
|
150 |
-
*.pt
|
151 |
-
*.pb
|
152 |
-
*.onnx
|
153 |
-
*.engine
|
154 |
-
*.mlmodel
|
155 |
-
*.mlpackage
|
156 |
-
*.torchscript
|
157 |
-
*.tflite
|
158 |
-
*.h5
|
159 |
-
*_saved_model/
|
160 |
-
*_web_model/
|
161 |
-
*_openvino_model/
|
162 |
-
*_paddle_model/
|
163 |
-
pnnx*
|
164 |
-
|
165 |
-
# Autogenerated files for tests
|
166 |
-
/ultralytics/assets/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitmodules
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
[submodule "EasyOCR-Trainer"]
|
2 |
-
path = EasyOCR-Trainer
|
3 |
-
url = https://github.com/Alimustoofaa/EasyOCR-Trainer
|
|
|
|
|
|
|
|
docs/en/datasets/classify/caltech101.md
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn about the Caltech-101 dataset, its structure and uses in machine learning. Includes instructions to train a YOLO model using this dataset.
|
4 |
+
keywords: Caltech-101, dataset, YOLO training, machine learning, object recognition, ultralytics
|
5 |
+
---
|
6 |
+
|
7 |
+
# Caltech-101 Dataset
|
8 |
+
|
9 |
+
The [Caltech-101](https://data.caltech.edu/records/mzrjq-6wc02) dataset is a widely used dataset for object recognition tasks, containing around 9,000 images from 101 object categories. The categories were chosen to reflect a variety of real-world objects, and the images themselves were carefully selected and annotated to provide a challenging benchmark for object recognition algorithms.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- The Caltech-101 dataset comprises around 9,000 color images divided into 101 categories.
|
14 |
+
- The categories encompass a wide variety of objects, including animals, vehicles, household items, and people.
|
15 |
+
- The number of images per category varies, with about 40 to 800 images in each category.
|
16 |
+
- Images are of variable sizes, with most images being medium resolution.
|
17 |
+
- Caltech-101 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
Unlike many other datasets, the Caltech-101 dataset is not formally split into training and testing sets. Users typically create their own splits based on their specific needs. However, a common practice is to use a random subset of images for training (e.g., 30 images per category) and the remaining images for testing.
|
22 |
+
|
23 |
+
## Applications
|
24 |
+
|
25 |
+
The Caltech-101 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its wide variety of categories and high-quality images make it an excellent dataset for research and development in the field of machine learning and computer vision.
|
26 |
+
|
27 |
+
## Usage
|
28 |
+
|
29 |
+
To train a YOLO model on the Caltech-101 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
30 |
+
|
31 |
+
!!! Example "Train Example"
|
32 |
+
|
33 |
+
=== "Python"
|
34 |
+
|
35 |
+
```python
|
36 |
+
from ultralytics import YOLO
|
37 |
+
|
38 |
+
# Load a model
|
39 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
40 |
+
|
41 |
+
# Train the model
|
42 |
+
results = model.train(data='caltech101', epochs=100, imgsz=416)
|
43 |
+
```
|
44 |
+
|
45 |
+
=== "CLI"
|
46 |
+
|
47 |
+
```bash
|
48 |
+
# Start training from a pretrained *.pt model
|
49 |
+
yolo detect train data=caltech101 model=yolov8n-cls.pt epochs=100 imgsz=416
|
50 |
+
```
|
51 |
+
|
52 |
+
## Sample Images and Annotations
|
53 |
+
|
54 |
+
The Caltech-101 dataset contains high-quality color images of various objects, providing a well-structured dataset for object recognition tasks. Here are some examples of images from the dataset:
|
55 |
+
|
56 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239366386-44171121-b745-4206-9b59-a3be41e16089.png)
|
57 |
+
|
58 |
+
The example showcases the variety and complexity of the objects in the Caltech-101 dataset, emphasizing the significance of a diverse dataset for training robust object recognition models.
|
59 |
+
|
60 |
+
## Citations and Acknowledgments
|
61 |
+
|
62 |
+
If you use the Caltech-101 dataset in your research or development work, please cite the following paper:
|
63 |
+
|
64 |
+
!!! Quote ""
|
65 |
+
|
66 |
+
=== "BibTeX"
|
67 |
+
|
68 |
+
```bibtex
|
69 |
+
@article{fei2007learning,
|
70 |
+
title={Learning generative visual models from few training examples: An incremental Bayesian approach tested on 101 object categories},
|
71 |
+
author={Fei-Fei, Li and Fergus, Rob and Perona, Pietro},
|
72 |
+
journal={Computer vision and Image understanding},
|
73 |
+
volume={106},
|
74 |
+
number={1},
|
75 |
+
pages={59--70},
|
76 |
+
year={2007},
|
77 |
+
publisher={Elsevier}
|
78 |
+
}
|
79 |
+
```
|
80 |
+
|
81 |
+
We would like to acknowledge Li Fei-Fei, Rob Fergus, and Pietro Perona for creating and maintaining the Caltech-101 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the Caltech-101 dataset and its creators, visit the [Caltech-101 dataset website](https://data.caltech.edu/records/mzrjq-6wc02).
|
docs/en/datasets/classify/caltech256.md
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the Caltech-256 dataset, a diverse collection of images used for object recognition tasks in machine learning. Learn to train a YOLO model on the dataset.
|
4 |
+
keywords: Ultralytics, YOLO, Caltech-256, dataset, object recognition, machine learning, computer vision, deep learning
|
5 |
+
---
|
6 |
+
|
7 |
+
# Caltech-256 Dataset
|
8 |
+
|
9 |
+
The [Caltech-256](https://data.caltech.edu/records/nyy15-4j048) dataset is an extensive collection of images used for object classification tasks. It contains around 30,000 images divided into 257 categories (256 object categories and 1 background category). The images are carefully curated and annotated to provide a challenging and diverse benchmark for object recognition algorithms.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- The Caltech-256 dataset comprises around 30,000 color images divided into 257 categories.
|
14 |
+
- Each category contains a minimum of 80 images.
|
15 |
+
- The categories encompass a wide variety of real-world objects, including animals, vehicles, household items, and people.
|
16 |
+
- Images are of variable sizes and resolutions.
|
17 |
+
- Caltech-256 is widely used for training and testing in the field of machine learning, particularly for object recognition tasks.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
Like Caltech-101, the Caltech-256 dataset does not have a formal split between training and testing sets. Users typically create their own splits according to their specific needs. A common practice is to use a random subset of images for training and the remaining images for testing.
|
22 |
+
|
23 |
+
## Applications
|
24 |
+
|
25 |
+
The Caltech-256 dataset is extensively used for training and evaluating deep learning models in object recognition tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. Its diverse set of categories and high-quality images make it an invaluable dataset for research and development in the field of machine learning and computer vision.
|
26 |
+
|
27 |
+
## Usage
|
28 |
+
|
29 |
+
To train a YOLO model on the Caltech-256 dataset for 100 epochs, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
30 |
+
|
31 |
+
!!! Example "Train Example"
|
32 |
+
|
33 |
+
=== "Python"
|
34 |
+
|
35 |
+
```python
|
36 |
+
from ultralytics import YOLO
|
37 |
+
|
38 |
+
# Load a model
|
39 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
40 |
+
|
41 |
+
# Train the model
|
42 |
+
results = model.train(data='caltech256', epochs=100, imgsz=416)
|
43 |
+
```
|
44 |
+
|
45 |
+
=== "CLI"
|
46 |
+
|
47 |
+
```bash
|
48 |
+
# Start training from a pretrained *.pt model
|
49 |
+
yolo detect train data=caltech256 model=yolov8n-cls.pt epochs=100 imgsz=416
|
50 |
+
```
|
51 |
+
|
52 |
+
## Sample Images and Annotations
|
53 |
+
|
54 |
+
The Caltech-256 dataset contains high-quality color images of various objects, providing a comprehensive dataset for object recognition tasks. Here are some examples of images from the dataset ([credit](https://ml4a.github.io/demos/tsne_viewer.html)):
|
55 |
+
|
56 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239365061-1e5f7857-b1e8-44ca-b3d7-d0befbcd33f9.jpg)
|
57 |
+
|
58 |
+
The example showcases the diversity and complexity of the objects in the Caltech-256 dataset, emphasizing the importance of a varied dataset for training robust object recognition models.
|
59 |
+
|
60 |
+
## Citations and Acknowledgments
|
61 |
+
|
62 |
+
If you use the Caltech-256 dataset in your research or development work, please cite the following paper:
|
63 |
+
|
64 |
+
!!! Quote ""
|
65 |
+
|
66 |
+
=== "BibTeX"
|
67 |
+
|
68 |
+
```bibtex
|
69 |
+
@article{griffin2007caltech,
|
70 |
+
title={Caltech-256 object category dataset},
|
71 |
+
author={Griffin, Gregory and Holub, Alex and Perona, Pietro},
|
72 |
+
year={2007}
|
73 |
+
}
|
74 |
+
```
|
75 |
+
|
76 |
+
We would like to acknowledge Gregory Griffin, Alex Holub, and Pietro Perona for creating and maintaining the Caltech-256 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the
|
77 |
+
|
78 |
+
Caltech-256 dataset and its creators, visit the [Caltech-256 dataset website](https://data.caltech.edu/records/nyy15-4j048).
|
docs/en/datasets/classify/cifar10.md
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the CIFAR-10 dataset, widely used for training in machine learning and computer vision, and learn how to use it with Ultralytics YOLO.
|
4 |
+
keywords: CIFAR-10, dataset, machine learning, image classification, computer vision, YOLO, Ultralytics, training, testing, deep learning, Convolutional Neural Networks, Support Vector Machines
|
5 |
+
---
|
6 |
+
|
7 |
+
# CIFAR-10 Dataset
|
8 |
+
|
9 |
+
The [CIFAR-10](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a collection of images used widely for machine learning and computer vision algorithms. It was developed by researchers at the CIFAR institute and consists of 60,000 32x32 color images in 10 different classes.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- The CIFAR-10 dataset consists of 60,000 images, divided into 10 classes.
|
14 |
+
- Each class contains 6,000 images, split into 5,000 for training and 1,000 for testing.
|
15 |
+
- The images are colored and of size 32x32 pixels.
|
16 |
+
- The 10 different classes represent airplanes, cars, birds, cats, deer, dogs, frogs, horses, ships, and trucks.
|
17 |
+
- CIFAR-10 is commonly used for training and testing in the field of machine learning and computer vision.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
The CIFAR-10 dataset is split into two subsets:
|
22 |
+
|
23 |
+
1. **Training Set**: This subset contains 50,000 images used for training machine learning models.
|
24 |
+
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
|
25 |
+
|
26 |
+
## Applications
|
27 |
+
|
28 |
+
The CIFAR-10 dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a well-rounded dataset for research and development in the field of machine learning and computer vision.
|
29 |
+
|
30 |
+
## Usage
|
31 |
+
|
32 |
+
To train a YOLO model on the CIFAR-10 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
33 |
+
|
34 |
+
!!! Example "Train Example"
|
35 |
+
|
36 |
+
=== "Python"
|
37 |
+
|
38 |
+
```python
|
39 |
+
from ultralytics import YOLO
|
40 |
+
|
41 |
+
# Load a model
|
42 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
43 |
+
|
44 |
+
# Train the model
|
45 |
+
results = model.train(data='cifar10', epochs=100, imgsz=32)
|
46 |
+
```
|
47 |
+
|
48 |
+
=== "CLI"
|
49 |
+
|
50 |
+
```bash
|
51 |
+
# Start training from a pretrained *.pt model
|
52 |
+
yolo detect train data=cifar10 model=yolov8n-cls.pt epochs=100 imgsz=32
|
53 |
+
```
|
54 |
+
|
55 |
+
## Sample Images and Annotations
|
56 |
+
|
57 |
+
The CIFAR-10 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
|
58 |
+
|
59 |
+
![Dataset sample image](https://miro.medium.com/max/1100/1*SZnidBt7CQ4Xqcag6rd8Ew.png)
|
60 |
+
|
61 |
+
The example showcases the variety and complexity of the objects in the CIFAR-10 dataset, highlighting the importance of a diverse dataset for training robust image classification models.
|
62 |
+
|
63 |
+
## Citations and Acknowledgments
|
64 |
+
|
65 |
+
If you use the CIFAR-10 dataset in your research or development work, please cite the following paper:
|
66 |
+
|
67 |
+
!!! Quote ""
|
68 |
+
|
69 |
+
=== "BibTeX"
|
70 |
+
|
71 |
+
```bibtex
|
72 |
+
@TECHREPORT{Krizhevsky09learningmultiple,
|
73 |
+
author={Alex Krizhevsky},
|
74 |
+
title={Learning multiple layers of features from tiny images},
|
75 |
+
institution={},
|
76 |
+
year={2009}
|
77 |
+
}
|
78 |
+
```
|
79 |
+
|
80 |
+
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-10 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-10 dataset and its creator, visit the [CIFAR-10 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).
|
docs/en/datasets/classify/cifar100.md
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover how to leverage the CIFAR-100 dataset for machine learning and computer vision tasks with YOLO. Gain insights on its structure, use, and utilization for model training.
|
4 |
+
keywords: Ultralytics, YOLO, CIFAR-100 dataset, image classification, machine learning, computer vision, YOLO model training
|
5 |
+
---
|
6 |
+
|
7 |
+
# CIFAR-100 Dataset
|
8 |
+
|
9 |
+
The [CIFAR-100](https://www.cs.toronto.edu/~kriz/cifar.html) (Canadian Institute For Advanced Research) dataset is a significant extension of the CIFAR-10 dataset, composed of 60,000 32x32 color images in 100 different classes. It was developed by researchers at the CIFAR institute, offering a more challenging dataset for more complex machine learning and computer vision tasks.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- The CIFAR-100 dataset consists of 60,000 images, divided into 100 classes.
|
14 |
+
- Each class contains 600 images, split into 500 for training and 100 for testing.
|
15 |
+
- The images are colored and of size 32x32 pixels.
|
16 |
+
- The 100 different classes are grouped into 20 coarse categories for higher level classification.
|
17 |
+
- CIFAR-100 is commonly used for training and testing in the field of machine learning and computer vision.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
The CIFAR-100 dataset is split into two subsets:
|
22 |
+
|
23 |
+
1. **Training Set**: This subset contains 50,000 images used for training machine learning models.
|
24 |
+
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
|
25 |
+
|
26 |
+
## Applications
|
27 |
+
|
28 |
+
The CIFAR-100 dataset is extensively used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The diversity of the dataset in terms of classes and the presence of color images make it a more challenging and comprehensive dataset for research and development in the field of machine learning and computer vision.
|
29 |
+
|
30 |
+
## Usage
|
31 |
+
|
32 |
+
To train a YOLO model on the CIFAR-100 dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
33 |
+
|
34 |
+
!!! Example "Train Example"
|
35 |
+
|
36 |
+
=== "Python"
|
37 |
+
|
38 |
+
```python
|
39 |
+
from ultralytics import YOLO
|
40 |
+
|
41 |
+
# Load a model
|
42 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
43 |
+
|
44 |
+
# Train the model
|
45 |
+
results = model.train(data='cifar100', epochs=100, imgsz=32)
|
46 |
+
```
|
47 |
+
|
48 |
+
=== "CLI"
|
49 |
+
|
50 |
+
```bash
|
51 |
+
# Start training from a pretrained *.pt model
|
52 |
+
yolo detect train data=cifar100 model=yolov8n-cls.pt epochs=100 imgsz=32
|
53 |
+
```
|
54 |
+
|
55 |
+
## Sample Images and Annotations
|
56 |
+
|
57 |
+
The CIFAR-100 dataset contains color images of various objects, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
|
58 |
+
|
59 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239363319-62ebf02f-7469-4178-b066-ccac3cd334db.jpg)
|
60 |
+
|
61 |
+
The example showcases the variety and complexity of the objects in the CIFAR-100 dataset, highlighting the importance of a diverse dataset for training robust image classification models.
|
62 |
+
|
63 |
+
## Citations and Acknowledgments
|
64 |
+
|
65 |
+
If you use the CIFAR-100 dataset in your research or development work, please cite the following paper:
|
66 |
+
|
67 |
+
!!! Quote ""
|
68 |
+
|
69 |
+
=== "BibTeX"
|
70 |
+
|
71 |
+
```bibtex
|
72 |
+
@TECHREPORT{Krizhevsky09learningmultiple,
|
73 |
+
author={Alex Krizhevsky},
|
74 |
+
title={Learning multiple layers of features from tiny images},
|
75 |
+
institution={},
|
76 |
+
year={2009}
|
77 |
+
}
|
78 |
+
```
|
79 |
+
|
80 |
+
We would like to acknowledge Alex Krizhevsky for creating and maintaining the CIFAR-100 dataset as a valuable resource for the machine learning and computer vision research community. For more information about the CIFAR-100 dataset and its creator, visit the [CIFAR-100 dataset website](https://www.cs.toronto.edu/~kriz/cifar.html).
|
docs/en/datasets/classify/fashion-mnist.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn how to use the Fashion-MNIST dataset for image classification with the Ultralytics YOLO model. Covers dataset structure, labels, applications, and usage.
|
4 |
+
keywords: Ultralytics, YOLO, Fashion-MNIST, dataset, image classification, machine learning, deep learning, neural networks, training, testing
|
5 |
+
---
|
6 |
+
|
7 |
+
# Fashion-MNIST Dataset
|
8 |
+
|
9 |
+
The [Fashion-MNIST](https://github.com/zalandoresearch/fashion-mnist) dataset is a database of Zalando's article images—consisting of a training set of 60,000 examples and a test set of 10,000 examples. Each example is a 28x28 grayscale image, associated with a label from 10 classes. Fashion-MNIST is intended to serve as a direct drop-in replacement for the original MNIST dataset for benchmarking machine learning algorithms.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- Fashion-MNIST contains 60,000 training images and 10,000 testing images of Zalando's article images.
|
14 |
+
- The dataset comprises grayscale images of size 28x28 pixels.
|
15 |
+
- Each pixel has a single pixel-value associated with it, indicating the lightness or darkness of that pixel, with higher numbers meaning darker. This pixel-value is an integer between 0 and 255.
|
16 |
+
- Fashion-MNIST is widely used for training and testing in the field of machine learning, especially for image classification tasks.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The Fashion-MNIST dataset is split into two subsets:
|
21 |
+
|
22 |
+
1. **Training Set**: This subset contains 60,000 images used for training machine learning models.
|
23 |
+
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
|
24 |
+
|
25 |
+
## Labels
|
26 |
+
|
27 |
+
Each training and test example is assigned to one of the following labels:
|
28 |
+
|
29 |
+
0. T-shirt/top
|
30 |
+
1. Trouser
|
31 |
+
2. Pullover
|
32 |
+
3. Dress
|
33 |
+
4. Coat
|
34 |
+
5. Sandal
|
35 |
+
6. Shirt
|
36 |
+
7. Sneaker
|
37 |
+
8. Bag
|
38 |
+
9. Ankle boot
|
39 |
+
|
40 |
+
## Applications
|
41 |
+
|
42 |
+
The Fashion-MNIST dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The dataset's simple and well-structured format makes it an essential resource for researchers and practitioners in the field of machine learning and computer vision.
|
43 |
+
|
44 |
+
## Usage
|
45 |
+
|
46 |
+
To train a CNN model on the Fashion-MNIST dataset for 100 epochs with an image size of 28x28, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
47 |
+
|
48 |
+
!!! Example "Train Example"
|
49 |
+
|
50 |
+
=== "Python"
|
51 |
+
|
52 |
+
```python
|
53 |
+
from ultralytics import YOLO
|
54 |
+
|
55 |
+
# Load a model
|
56 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
57 |
+
|
58 |
+
# Train the model
|
59 |
+
results = model.train(data='fashion-mnist', epochs=100, imgsz=28)
|
60 |
+
```
|
61 |
+
|
62 |
+
=== "CLI"
|
63 |
+
|
64 |
+
```bash
|
65 |
+
# Start training from a pretrained *.pt model
|
66 |
+
yolo detect train data=fashion-mnist model=yolov8n-cls.pt epochs=100 imgsz=28
|
67 |
+
```
|
68 |
+
|
69 |
+
## Sample Images and Annotations
|
70 |
+
|
71 |
+
The Fashion-MNIST dataset contains grayscale images of Zalando's article images, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
|
72 |
+
|
73 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239359139-ce0a434e-9056-43e0-a306-3214f193dcce.png)
|
74 |
+
|
75 |
+
The example showcases the variety and complexity of the images in the Fashion-MNIST dataset, highlighting the importance of a diverse dataset for training robust image classification models.
|
76 |
+
|
77 |
+
## Acknowledgments
|
78 |
+
|
79 |
+
If you use the Fashion-MNIST dataset in your research or development work, please acknowledge the dataset by linking to the [GitHub repository](https://github.com/zalandoresearch/fashion-mnist). This dataset was made available by Zalando Research.
|
docs/en/datasets/classify/imagenet.md
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Understand how to use ImageNet, an extensive annotated image dataset for object recognition research, with Ultralytics YOLO models. Learn about its structure, usage, and significance in computer vision.
|
4 |
+
keywords: Ultralytics, YOLO, ImageNet, dataset, object recognition, deep learning, computer vision, machine learning, dataset training, model training, image classification, object detection
|
5 |
+
---
|
6 |
+
|
7 |
+
# ImageNet Dataset
|
8 |
+
|
9 |
+
[ImageNet](https://www.image-net.org/) is a large-scale database of annotated images designed for use in visual object recognition research. It contains over 14 million images, with each image annotated using WordNet synsets, making it one of the most extensive resources available for training deep learning models in computer vision tasks.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- ImageNet contains over 14 million high-resolution images spanning thousands of object categories.
|
14 |
+
- The dataset is organized according to the WordNet hierarchy, with each synset representing a category.
|
15 |
+
- ImageNet is widely used for training and benchmarking in the field of computer vision, particularly for image classification and object detection tasks.
|
16 |
+
- The annual ImageNet Large Scale Visual Recognition Challenge (ILSVRC) has been instrumental in advancing computer vision research.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The ImageNet dataset is organized using the WordNet hierarchy. Each node in the hierarchy represents a category, and each category is described by a synset (a collection of synonymous terms). The images in ImageNet are annotated with one or more synsets, providing a rich resource for training models to recognize various objects and their relationships.
|
21 |
+
|
22 |
+
## ImageNet Large Scale Visual Recognition Challenge (ILSVRC)
|
23 |
+
|
24 |
+
The annual [ImageNet Large Scale Visual Recognition Challenge (ILSVRC)](https://image-net.org/challenges/LSVRC/) has been an important event in the field of computer vision. It has provided a platform for researchers and developers to evaluate their algorithms and models on a large-scale dataset with standardized evaluation metrics. The ILSVRC has led to significant advancements in the development of deep learning models for image classification, object detection, and other computer vision tasks.
|
25 |
+
|
26 |
+
## Applications
|
27 |
+
|
28 |
+
The ImageNet dataset is widely used for training and evaluating deep learning models in various computer vision tasks, such as image classification, object detection, and object localization. Some popular deep learning architectures, such as AlexNet, VGG, and ResNet, were developed and benchmarked using the ImageNet dataset.
|
29 |
+
|
30 |
+
## Usage
|
31 |
+
|
32 |
+
To train a deep learning model on the ImageNet dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
33 |
+
|
34 |
+
!!! Example "Train Example"
|
35 |
+
|
36 |
+
=== "Python"
|
37 |
+
|
38 |
+
```python
|
39 |
+
from ultralytics import YOLO
|
40 |
+
|
41 |
+
# Load a model
|
42 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
43 |
+
|
44 |
+
# Train the model
|
45 |
+
results = model.train(data='imagenet', epochs=100, imgsz=224)
|
46 |
+
```
|
47 |
+
|
48 |
+
=== "CLI"
|
49 |
+
|
50 |
+
```bash
|
51 |
+
# Start training from a pretrained *.pt model
|
52 |
+
yolo train data=imagenet model=yolov8n-cls.pt epochs=100 imgsz=224
|
53 |
+
```
|
54 |
+
|
55 |
+
## Sample Images and Annotations
|
56 |
+
|
57 |
+
The ImageNet dataset contains high-resolution images spanning thousands of object categories, providing a diverse and extensive dataset for training and evaluating computer vision models. Here are some examples of images from the dataset:
|
58 |
+
|
59 |
+
![Dataset sample images](https://user-images.githubusercontent.com/26833433/239280348-3d8f30c7-6f05-4dda-9cfe-d62ad9faecc9.png)
|
60 |
+
|
61 |
+
The example showcases the variety and complexity of the images in the ImageNet dataset, highlighting the importance of a diverse dataset for training robust computer vision models.
|
62 |
+
|
63 |
+
## Citations and Acknowledgments
|
64 |
+
|
65 |
+
If you use the ImageNet dataset in your research or development work, please cite the following paper:
|
66 |
+
|
67 |
+
!!! Quote ""
|
68 |
+
|
69 |
+
=== "BibTeX"
|
70 |
+
|
71 |
+
```bibtex
|
72 |
+
@article{ILSVRC15,
|
73 |
+
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
|
74 |
+
title={ImageNet Large Scale Visual Recognition Challenge},
|
75 |
+
year={2015},
|
76 |
+
journal={International Journal of Computer Vision (IJCV)},
|
77 |
+
volume={115},
|
78 |
+
number={3},
|
79 |
+
pages={211-252}
|
80 |
+
}
|
81 |
+
```
|
82 |
+
|
83 |
+
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).
|
docs/en/datasets/classify/imagenet10.md
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the compact ImageNet10 Dataset developed by Ultralytics. Ideal for fast testing of computer vision training pipelines and CV model sanity checks.
|
4 |
+
keywords: Ultralytics, YOLO, ImageNet10 Dataset, Image detection, Deep Learning, ImageNet, AI model testing, Computer vision, Machine learning
|
5 |
+
---
|
6 |
+
|
7 |
+
# ImageNet10 Dataset
|
8 |
+
|
9 |
+
The [ImageNet10](https://github.com/ultralytics/yolov5/releases/download/v1.0/imagenet10.zip) dataset is a small-scale subset of the [ImageNet](https://www.image-net.org/) database, developed by [Ultralytics](https://ultralytics.com) and designed for CI tests, sanity checks, and fast testing of training pipelines. This dataset is composed of the first image in the training set and the first image from the validation set of the first 10 classes in ImageNet. Although significantly smaller, it retains the structure and diversity of the original ImageNet dataset.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- ImageNet10 is a compact version of ImageNet, with 20 images representing the first 10 classes of the original dataset.
|
14 |
+
- The dataset is organized according to the WordNet hierarchy, mirroring the structure of the full ImageNet dataset.
|
15 |
+
- It is ideally suited for CI tests, sanity checks, and rapid testing of training pipelines in computer vision tasks.
|
16 |
+
- Although not designed for model benchmarking, it can provide a quick indication of a model's basic functionality and correctness.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The ImageNet10 dataset, like the original ImageNet, is organized using the WordNet hierarchy. Each of the 10 classes in ImageNet10 is described by a synset (a collection of synonymous terms). The images in ImageNet10 are annotated with one or more synsets, providing a compact resource for testing models to recognize various objects and their relationships.
|
21 |
+
|
22 |
+
## Applications
|
23 |
+
|
24 |
+
The ImageNet10 dataset is useful for quickly testing and debugging computer vision models and pipelines. Its small size allows for rapid iteration, making it ideal for continuous integration tests and sanity checks. It can also be used for fast preliminary testing of new models or changes to existing models before moving on to full-scale testing with the complete ImageNet dataset.
|
25 |
+
|
26 |
+
## Usage
|
27 |
+
|
28 |
+
To test a deep learning model on the ImageNet10 dataset with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
29 |
+
|
30 |
+
!!! Example "Test Example"
|
31 |
+
|
32 |
+
=== "Python"
|
33 |
+
|
34 |
+
```python
|
35 |
+
from ultralytics import YOLO
|
36 |
+
|
37 |
+
# Load a model
|
38 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
39 |
+
|
40 |
+
# Train the model
|
41 |
+
results = model.train(data='imagenet10', epochs=5, imgsz=224)
|
42 |
+
```
|
43 |
+
|
44 |
+
=== "CLI"
|
45 |
+
|
46 |
+
```bash
|
47 |
+
# Start training from a pretrained *.pt model
|
48 |
+
yolo train data=imagenet10 model=yolov8n-cls.pt epochs=5 imgsz=224
|
49 |
+
```
|
50 |
+
|
51 |
+
## Sample Images and Annotations
|
52 |
+
|
53 |
+
The ImageNet10 dataset contains a subset of images from the original ImageNet dataset. These images are chosen to represent the first 10 classes in the dataset, providing a diverse yet compact dataset for quick testing and evaluation.
|
54 |
+
|
55 |
+
![Dataset sample images](https://user-images.githubusercontent.com/26833433/239689723-16f9b4a7-becc-4deb-b875-d3e5c28eb03b.png) The example showcases the variety and complexity of the images in the ImageNet10 dataset, highlighting its usefulness for sanity checks and quick testing of computer vision models.
|
56 |
+
|
57 |
+
## Citations and Acknowledgments
|
58 |
+
|
59 |
+
If you use the ImageNet10 dataset in your research or development work, please cite the original ImageNet paper:
|
60 |
+
|
61 |
+
!!! Quote ""
|
62 |
+
|
63 |
+
=== "BibTeX"
|
64 |
+
|
65 |
+
```bibtex
|
66 |
+
@article{ILSVRC15,
|
67 |
+
author = {Olga Russakovsky and Jia Deng and Hao Su and Jonathan Krause and Sanjeev Satheesh and Sean Ma and Zhiheng Huang and Andrej Karpathy and Aditya Khosla and Michael Bernstein and Alexander C. Berg and Li Fei-Fei},
|
68 |
+
title={ImageNet Large Scale Visual Recognition Challenge},
|
69 |
+
year={2015},
|
70 |
+
journal={International Journal of Computer Vision (IJCV)},
|
71 |
+
volume={115},
|
72 |
+
number={3},
|
73 |
+
pages={211-252}
|
74 |
+
}
|
75 |
+
```
|
76 |
+
|
77 |
+
We would like to acknowledge the ImageNet team, led by Olga Russakovsky, Jia Deng, and Li Fei-Fei, for creating and maintaining the ImageNet dataset. The ImageNet10 dataset, while a compact subset, is a valuable resource for quick testing and debugging in the machine learning and computer vision research community. For more information about the ImageNet dataset and its creators, visit the [ImageNet website](https://www.image-net.org/).
|
docs/en/datasets/classify/imagenette.md
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn about the ImageNette dataset and its usage in deep learning model training. Find code snippets for model training and explore ImageNette datatypes.
|
4 |
+
keywords: ImageNette dataset, Ultralytics, YOLO, Image classification, Machine Learning, Deep learning, Training code snippets, CNN, ImageNette160, ImageNette320
|
5 |
+
---
|
6 |
+
|
7 |
+
# ImageNette Dataset
|
8 |
+
|
9 |
+
The [ImageNette](https://github.com/fastai/imagenette) dataset is a subset of the larger [Imagenet](https://www.image-net.org/) dataset, but it only includes 10 easily distinguishable classes. It was created to provide a quicker, easier-to-use version of Imagenet for software development and education.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- ImageNette contains images from 10 different classes such as tench, English springer, cassette player, chain saw, church, French horn, garbage truck, gas pump, golf ball, parachute.
|
14 |
+
- The dataset comprises colored images of varying dimensions.
|
15 |
+
- ImageNette is widely used for training and testing in the field of machine learning, especially for image classification tasks.
|
16 |
+
|
17 |
+
## Dataset Structure
|
18 |
+
|
19 |
+
The ImageNette dataset is split into two subsets:
|
20 |
+
|
21 |
+
1. **Training Set**: This subset contains several thousands of images used for training machine learning models. The exact number varies per class.
|
22 |
+
2. **Validation Set**: This subset consists of several hundreds of images used for validating and benchmarking the trained models. Again, the exact number varies per class.
|
23 |
+
|
24 |
+
## Applications
|
25 |
+
|
26 |
+
The ImageNette dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), and various other machine learning algorithms. The dataset's straightforward format and well-chosen classes make it a handy resource for both beginner and experienced practitioners in the field of machine learning and computer vision.
|
27 |
+
|
28 |
+
## Usage
|
29 |
+
|
30 |
+
To train a model on the ImageNette dataset for 100 epochs with a standard image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
31 |
+
|
32 |
+
!!! Example "Train Example"
|
33 |
+
|
34 |
+
=== "Python"
|
35 |
+
|
36 |
+
```python
|
37 |
+
from ultralytics import YOLO
|
38 |
+
|
39 |
+
# Load a model
|
40 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
41 |
+
|
42 |
+
# Train the model
|
43 |
+
results = model.train(data='imagenette', epochs=100, imgsz=224)
|
44 |
+
```
|
45 |
+
|
46 |
+
=== "CLI"
|
47 |
+
|
48 |
+
```bash
|
49 |
+
# Start training from a pretrained *.pt model
|
50 |
+
yolo detect train data=imagenette model=yolov8n-cls.pt epochs=100 imgsz=224
|
51 |
+
```
|
52 |
+
|
53 |
+
## Sample Images and Annotations
|
54 |
+
|
55 |
+
The ImageNette dataset contains colored images of various objects and scenes, providing a diverse dataset for image classification tasks. Here are some examples of images from the dataset:
|
56 |
+
|
57 |
+
![Dataset sample image](https://docs.fast.ai/22_tutorial.imagenette_files/figure-html/cell-21-output-1.png)
|
58 |
+
|
59 |
+
The example showcases the variety and complexity of the images in the ImageNette dataset, highlighting the importance of a diverse dataset for training robust image classification models.
|
60 |
+
|
61 |
+
## ImageNette160 and ImageNette320
|
62 |
+
|
63 |
+
For faster prototyping and training, the ImageNette dataset is also available in two reduced sizes: ImageNette160 and ImageNette320. These datasets maintain the same classes and structure as the full ImageNette dataset, but the images are resized to a smaller dimension. As such, these versions of the dataset are particularly useful for preliminary model testing, or when computational resources are limited.
|
64 |
+
|
65 |
+
To use these datasets, simply replace 'imagenette' with 'imagenette160' or 'imagenette320' in the training command. The following code snippets illustrate this:
|
66 |
+
|
67 |
+
!!! Example "Train Example with ImageNette160"
|
68 |
+
|
69 |
+
=== "Python"
|
70 |
+
|
71 |
+
```python
|
72 |
+
from ultralytics import YOLO
|
73 |
+
|
74 |
+
# Load a model
|
75 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
76 |
+
|
77 |
+
# Train the model with ImageNette160
|
78 |
+
results = model.train(data='imagenette160', epochs=100, imgsz=160)
|
79 |
+
```
|
80 |
+
|
81 |
+
=== "CLI"
|
82 |
+
|
83 |
+
```bash
|
84 |
+
# Start training from a pretrained *.pt model with ImageNette160
|
85 |
+
yolo detect train data=imagenette160 model=yolov8n-cls.pt epochs=100 imgsz=160
|
86 |
+
```
|
87 |
+
|
88 |
+
!!! Example "Train Example with ImageNette320"
|
89 |
+
|
90 |
+
=== "Python"
|
91 |
+
|
92 |
+
```python
|
93 |
+
from ultralytics import YOLO
|
94 |
+
|
95 |
+
# Load a model
|
96 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
97 |
+
|
98 |
+
# Train the model with ImageNette320
|
99 |
+
results = model.train(data='imagenette320', epochs=100, imgsz=320)
|
100 |
+
```
|
101 |
+
|
102 |
+
=== "CLI"
|
103 |
+
|
104 |
+
```bash
|
105 |
+
# Start training from a pretrained *.pt model with ImageNette320
|
106 |
+
yolo detect train data=imagenette320 model=yolov8n-cls.pt epochs=100 imgsz=320
|
107 |
+
```
|
108 |
+
|
109 |
+
These smaller versions of the dataset allow for rapid iterations during the development process while still providing valuable and realistic image classification tasks.
|
110 |
+
|
111 |
+
## Citations and Acknowledgments
|
112 |
+
|
113 |
+
If you use the ImageNette dataset in your research or development work, please acknowledge it appropriately. For more information about the ImageNette dataset, visit the [ImageNette dataset GitHub page](https://github.com/fastai/imagenette).
|
docs/en/datasets/classify/imagewoof.md
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the ImageWoof dataset, designed for challenging dog breed classification. Train AI models with Ultralytics YOLO using this dataset.
|
4 |
+
keywords: ImageWoof, image classification, dog breeds, machine learning, deep learning, Ultralytics, YOLO, dataset
|
5 |
+
---
|
6 |
+
|
7 |
+
# ImageWoof Dataset
|
8 |
+
|
9 |
+
The [ImageWoof](https://github.com/fastai/imagenette) dataset is a subset of the ImageNet consisting of 10 classes that are challenging to classify, since they're all dog breeds. It was created as a more difficult task for image classification algorithms to solve, aiming at encouraging development of more advanced models.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- ImageWoof contains images of 10 different dog breeds: Australian terrier, Border terrier, Samoyed, Beagle, Shih-Tzu, English foxhound, Rhodesian ridgeback, Dingo, Golden retriever, and Old English sheepdog.
|
14 |
+
- The dataset provides images at various resolutions (full size, 320px, 160px), accommodating for different computational capabilities and research needs.
|
15 |
+
- It also includes a version with noisy labels, providing a more realistic scenario where labels might not always be reliable.
|
16 |
+
|
17 |
+
## Dataset Structure
|
18 |
+
|
19 |
+
The ImageWoof dataset structure is based on the dog breed classes, with each breed having its own directory of images.
|
20 |
+
|
21 |
+
## Applications
|
22 |
+
|
23 |
+
The ImageWoof dataset is widely used for training and evaluating deep learning models in image classification tasks, especially when it comes to more complex and similar classes. The dataset's challenge lies in the subtle differences between the dog breeds, pushing the limits of model's performance and generalization.
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
To train a CNN model on the ImageWoof dataset for 100 epochs with an image size of 224x224, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
28 |
+
|
29 |
+
!!! Example "Train Example"
|
30 |
+
|
31 |
+
=== "Python"
|
32 |
+
|
33 |
+
```python
|
34 |
+
from ultralytics import YOLO
|
35 |
+
|
36 |
+
# Load a model
|
37 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
38 |
+
|
39 |
+
# Train the model
|
40 |
+
results = model.train(data='imagewoof', epochs=100, imgsz=224)
|
41 |
+
```
|
42 |
+
|
43 |
+
=== "CLI"
|
44 |
+
|
45 |
+
```bash
|
46 |
+
# Start training from a pretrained *.pt model
|
47 |
+
yolo detect train data=imagewoof model=yolov8n-cls.pt epochs=100 imgsz=224
|
48 |
+
```
|
49 |
+
|
50 |
+
## Dataset Variants
|
51 |
+
|
52 |
+
ImageWoof dataset comes in three different sizes to accommodate various research needs and computational capabilities:
|
53 |
+
|
54 |
+
1. **Full Size (imagewoof)**: This is the original version of the ImageWoof dataset. It contains full-sized images and is ideal for final training and performance benchmarking.
|
55 |
+
|
56 |
+
2. **Medium Size (imagewoof320)**: This version contains images resized to have a maximum edge length of 320 pixels. It's suitable for faster training without significantly sacrificing model performance.
|
57 |
+
|
58 |
+
3. **Small Size (imagewoof160)**: This version contains images resized to have a maximum edge length of 160 pixels. It's designed for rapid prototyping and experimentation where training speed is a priority.
|
59 |
+
|
60 |
+
To use these variants in your training, simply replace 'imagewoof' in the dataset argument with 'imagewoof320' or 'imagewoof160'. For example:
|
61 |
+
|
62 |
+
```python
|
63 |
+
from ultralytics import YOLO
|
64 |
+
|
65 |
+
# Load a model
|
66 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
67 |
+
|
68 |
+
# For medium-sized dataset
|
69 |
+
model.train(data='imagewoof320', epochs=100, imgsz=224)
|
70 |
+
|
71 |
+
# For small-sized dataset
|
72 |
+
model.train(data='imagewoof160', epochs=100, imgsz=224)
|
73 |
+
```
|
74 |
+
|
75 |
+
It's important to note that using smaller images will likely yield lower performance in terms of classification accuracy. However, it's an excellent way to iterate quickly in the early stages of model development and prototyping.
|
76 |
+
|
77 |
+
## Sample Images and Annotations
|
78 |
+
|
79 |
+
The ImageWoof dataset contains colorful images of various dog breeds, providing a challenging dataset for image classification tasks. Here are some examples of images from the dataset:
|
80 |
+
|
81 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239357533-ec833254-4351-491b-8cb3-59578ea5d0b2.png)
|
82 |
+
|
83 |
+
The example showcases the subtle differences and similarities among the different dog breeds in the ImageWoof dataset, highlighting the complexity and difficulty of the classification task.
|
84 |
+
|
85 |
+
## Citations and Acknowledgments
|
86 |
+
|
87 |
+
If you use the ImageWoof dataset in your research or development work, please make sure to acknowledge the creators of the dataset by linking to the [official dataset repository](https://github.com/fastai/imagenette).
|
88 |
+
|
89 |
+
We would like to acknowledge the FastAI team for creating and maintaining the ImageWoof dataset as a valuable resource for the machine learning and computer vision research community. For more information about the ImageWoof dataset, visit the [ImageWoof dataset repository](https://github.com/fastai/imagenette).
|
docs/en/datasets/classify/index.md
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore image classification datasets supported by Ultralytics, learn the standard dataset format, and set up your own dataset for training models.
|
4 |
+
keywords: Ultralytics, image classification, dataset, machine learning, CIFAR-10, ImageNet, MNIST, torchvision
|
5 |
+
---
|
6 |
+
|
7 |
+
# Image Classification Datasets Overview
|
8 |
+
|
9 |
+
## Dataset format
|
10 |
+
|
11 |
+
The folder structure for classification datasets in torchvision typically follows a standard format:
|
12 |
+
|
13 |
+
```
|
14 |
+
root/
|
15 |
+
|-- class1/
|
16 |
+
| |-- img1.jpg
|
17 |
+
| |-- img2.jpg
|
18 |
+
| |-- ...
|
19 |
+
|
|
20 |
+
|-- class2/
|
21 |
+
| |-- img1.jpg
|
22 |
+
| |-- img2.jpg
|
23 |
+
| |-- ...
|
24 |
+
|
|
25 |
+
|-- class3/
|
26 |
+
| |-- img1.jpg
|
27 |
+
| |-- img2.jpg
|
28 |
+
| |-- ...
|
29 |
+
|
|
30 |
+
|-- ...
|
31 |
+
```
|
32 |
+
|
33 |
+
In this folder structure, the `root` directory contains one subdirectory for each class in the dataset. Each subdirectory is named after the corresponding class and contains all the images for that class. Each image file is named uniquely and is typically in a common image file format such as JPEG or PNG.
|
34 |
+
|
35 |
+
** Example **
|
36 |
+
|
37 |
+
For example, in the CIFAR10 dataset, the folder structure would look like this:
|
38 |
+
|
39 |
+
```
|
40 |
+
cifar-10-/
|
41 |
+
|
|
42 |
+
|-- train/
|
43 |
+
| |-- airplane/
|
44 |
+
| | |-- 10008_airplane.png
|
45 |
+
| | |-- 10009_airplane.png
|
46 |
+
| | |-- ...
|
47 |
+
| |
|
48 |
+
| |-- automobile/
|
49 |
+
| | |-- 1000_automobile.png
|
50 |
+
| | |-- 1001_automobile.png
|
51 |
+
| | |-- ...
|
52 |
+
| |
|
53 |
+
| |-- bird/
|
54 |
+
| | |-- 10014_bird.png
|
55 |
+
| | |-- 10015_bird.png
|
56 |
+
| | |-- ...
|
57 |
+
| |
|
58 |
+
| |-- ...
|
59 |
+
|
|
60 |
+
|-- test/
|
61 |
+
| |-- airplane/
|
62 |
+
| | |-- 10_airplane.png
|
63 |
+
| | |-- 11_airplane.png
|
64 |
+
| | |-- ...
|
65 |
+
| |
|
66 |
+
| |-- automobile/
|
67 |
+
| | |-- 100_automobile.png
|
68 |
+
| | |-- 101_automobile.png
|
69 |
+
| | |-- ...
|
70 |
+
| |
|
71 |
+
| |-- bird/
|
72 |
+
| | |-- 1000_bird.png
|
73 |
+
| | |-- 1001_bird.png
|
74 |
+
| | |-- ...
|
75 |
+
| |
|
76 |
+
| |-- ...
|
77 |
+
```
|
78 |
+
|
79 |
+
In this example, the `train` directory contains subdirectories for each class in the dataset, and each class subdirectory contains all the images for that class. The `test` directory has a similar structure. The `root` directory also contains other files that are part of the CIFAR10 dataset.
|
80 |
+
|
81 |
+
## Usage
|
82 |
+
|
83 |
+
!!! Example
|
84 |
+
|
85 |
+
=== "Python"
|
86 |
+
|
87 |
+
```python
|
88 |
+
from ultralytics import YOLO
|
89 |
+
|
90 |
+
# Load a model
|
91 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
92 |
+
|
93 |
+
# Train the model
|
94 |
+
results = model.train(data='path/to/dataset', epochs=100, imgsz=640)
|
95 |
+
```
|
96 |
+
=== "CLI"
|
97 |
+
|
98 |
+
```bash
|
99 |
+
# Start training from a pretrained *.pt model
|
100 |
+
yolo detect train data=path/to/data model=yolov8n-cls.pt epochs=100 imgsz=640
|
101 |
+
```
|
102 |
+
|
103 |
+
## Supported Datasets
|
104 |
+
|
105 |
+
Ultralytics supports the following datasets with automatic download:
|
106 |
+
|
107 |
+
- [Caltech 101](caltech101.md): A dataset containing images of 101 object categories for image classification tasks.
|
108 |
+
- [Caltech 256](caltech256.md): An extended version of Caltech 101 with 256 object categories and more challenging images.
|
109 |
+
- [CIFAR-10](cifar10.md): A dataset of 60K 32x32 color images in 10 classes, with 6K images per class.
|
110 |
+
- [CIFAR-100](cifar100.md): An extended version of CIFAR-10 with 100 object categories and 600 images per class.
|
111 |
+
- [Fashion-MNIST](fashion-mnist.md): A dataset consisting of 70,000 grayscale images of 10 fashion categories for image classification tasks.
|
112 |
+
- [ImageNet](imagenet.md): A large-scale dataset for object detection and image classification with over 14 million images and 20,000 categories.
|
113 |
+
- [ImageNet-10](imagenet10.md): A smaller subset of ImageNet with 10 categories for faster experimentation and testing.
|
114 |
+
- [Imagenette](imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing.
|
115 |
+
- [Imagewoof](imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks.
|
116 |
+
- [MNIST](mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks.
|
117 |
+
|
118 |
+
### Adding your own dataset
|
119 |
+
|
120 |
+
If you have your own dataset and would like to use it for training classification models with Ultralytics, ensure that it follows the format specified above under "Dataset format" and then point your `data` argument to the dataset directory.
|
docs/en/datasets/classify/mnist.md
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Detailed guide on the MNIST Dataset, a benchmark in the machine learning community for image classification tasks. Learn about its structure, usage and application.
|
4 |
+
keywords: MNIST dataset, Ultralytics, image classification, machine learning, computer vision, deep learning, AI, dataset guide
|
5 |
+
---
|
6 |
+
|
7 |
+
# MNIST Dataset
|
8 |
+
|
9 |
+
The [MNIST](http://yann.lecun.com/exdb/mnist/) (Modified National Institute of Standards and Technology) dataset is a large database of handwritten digits that is commonly used for training various image processing systems and machine learning models. It was created by "re-mixing" the samples from NIST's original datasets and has become a benchmark for evaluating the performance of image classification algorithms.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- MNIST contains 60,000 training images and 10,000 testing images of handwritten digits.
|
14 |
+
- The dataset comprises grayscale images of size 28x28 pixels.
|
15 |
+
- The images are normalized to fit into a 28x28 pixel bounding box and anti-aliased, introducing grayscale levels.
|
16 |
+
- MNIST is widely used for training and testing in the field of machine learning, especially for image classification tasks.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The MNIST dataset is split into two subsets:
|
21 |
+
|
22 |
+
1. **Training Set**: This subset contains 60,000 images of handwritten digits used for training machine learning models.
|
23 |
+
2. **Testing Set**: This subset consists of 10,000 images used for testing and benchmarking the trained models.
|
24 |
+
|
25 |
+
## Extended MNIST (EMNIST)
|
26 |
+
|
27 |
+
Extended MNIST (EMNIST) is a newer dataset developed and released by NIST to be the successor to MNIST. While MNIST included images only of handwritten digits, EMNIST includes all the images from NIST Special Database 19, which is a large database of handwritten uppercase and lowercase letters as well as digits. The images in EMNIST were converted into the same 28x28 pixel format, by the same process, as were the MNIST images. Accordingly, tools that work with the older, smaller MNIST dataset will likely work unmodified with EMNIST.
|
28 |
+
|
29 |
+
## Applications
|
30 |
+
|
31 |
+
The MNIST dataset is widely used for training and evaluating deep learning models in image classification tasks, such as Convolutional Neural Networks (CNNs), Support Vector Machines (SVMs), and various other machine learning algorithms. The dataset's simple and well-structured format makes it an essential resource for researchers and practitioners in the field of machine learning and computer vision.
|
32 |
+
|
33 |
+
## Usage
|
34 |
+
|
35 |
+
To train a CNN model on the MNIST dataset for 100 epochs with an image size of 32x32, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
36 |
+
|
37 |
+
!!! Example "Train Example"
|
38 |
+
|
39 |
+
=== "Python"
|
40 |
+
|
41 |
+
```python
|
42 |
+
from ultralytics import YOLO
|
43 |
+
|
44 |
+
# Load a model
|
45 |
+
model = YOLO('yolov8n-cls.pt') # load a pretrained model (recommended for training)
|
46 |
+
|
47 |
+
# Train the model
|
48 |
+
results = model.train(data='mnist', epochs=100, imgsz=32)
|
49 |
+
```
|
50 |
+
|
51 |
+
=== "CLI"
|
52 |
+
|
53 |
+
```bash
|
54 |
+
# Start training from a pretrained *.pt model
|
55 |
+
cnn detect train data=mnist model=yolov8n-cls.pt epochs=100 imgsz=28
|
56 |
+
```
|
57 |
+
|
58 |
+
## Sample Images and Annotations
|
59 |
+
|
60 |
+
The MNIST dataset contains grayscale images of handwritten digits, providing a well-structured dataset for image classification tasks. Here are some examples of images from the dataset:
|
61 |
+
|
62 |
+
![Dataset sample image](https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png)
|
63 |
+
|
64 |
+
The example showcases the variety and complexity of the handwritten digits in the MNIST dataset, highlighting the importance of a diverse dataset for training robust image classification models.
|
65 |
+
|
66 |
+
## Citations and Acknowledgments
|
67 |
+
|
68 |
+
If you use the MNIST dataset in your
|
69 |
+
|
70 |
+
research or development work, please cite the following paper:
|
71 |
+
|
72 |
+
!!! Quote ""
|
73 |
+
|
74 |
+
=== "BibTeX"
|
75 |
+
|
76 |
+
```bibtex
|
77 |
+
@article{lecun2010mnist,
|
78 |
+
title={MNIST handwritten digit database},
|
79 |
+
author={LeCun, Yann and Cortes, Corinna and Burges, CJ},
|
80 |
+
journal={ATT Labs [Online]. Available: http://yann.lecun.com/exdb/mnist},
|
81 |
+
volume={2},
|
82 |
+
year={2010}
|
83 |
+
}
|
84 |
+
```
|
85 |
+
|
86 |
+
We would like to acknowledge Yann LeCun, Corinna Cortes, and Christopher J.C. Burges for creating and maintaining the MNIST dataset as a valuable resource for the machine learning and computer vision research community. For more information about the MNIST dataset and its creators, visit the [MNIST dataset website](http://yann.lecun.com/exdb/mnist/).
|
docs/en/datasets/detect/african-wildlife.md
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: African Wildlife objects detection, a leading dataset for object detection in forests, integrates with Ultralytics. Discover ways to use it for training YOLO models.
|
4 |
+
keywords: Ultralytics, African Wildlife dataset, object detection, YOLO, YOLO model training, object tracking, computer vision, deep learning models, forest research, animals tracking
|
5 |
+
---
|
6 |
+
|
7 |
+
# African Wildlife Dataset
|
8 |
+
|
9 |
+
This dataset showcases four common animal classes typically found in South African nature reserves. It includes images of African wildlife such as buffalo, elephant, rhino, and zebra, providing valuable insights into their characteristics. Essential for training computer vision algorithms, this dataset aids in identifying animals in various habitats, from zoos to forests, and supports wildlife research.
|
10 |
+
|
11 |
+
## Dataset Structure
|
12 |
+
|
13 |
+
The African wildlife objects detection dataset is split into three subsets:
|
14 |
+
|
15 |
+
- **Training set**: Contains 1052 images, each with corresponding annotations.
|
16 |
+
- **Validation set**: Includes 225 images, each with paired annotations.
|
17 |
+
- **Testing set**: Comprises 227 images, each with paired annotations.
|
18 |
+
|
19 |
+
## Applications
|
20 |
+
|
21 |
+
This dataset can be applied in various computer vision tasks such as object detection, object tracking, and research. Specifically, it can be used to train and evaluate models for identifying African wildlife objects in images, which can have applications in wildlife conservation, ecological research, and monitoring efforts in natural reserves and protected areas. Additionally, it can serve as a valuable resource for educational purposes, enabling students and researchers to study and understand the characteristics and behaviors of different animal species.
|
22 |
+
|
23 |
+
## Dataset YAML
|
24 |
+
|
25 |
+
A YAML (Yet Another Markup Language) file defines the dataset configuration, including paths, classes, and other pertinent details. For the African wildlife dataset, the `african-wildlife.yaml` file is located at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/african-wildlife.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/african-wildlife.yaml).
|
26 |
+
|
27 |
+
!!! Example "ultralytics/cfg/datasets/african-wildlife.yaml"
|
28 |
+
|
29 |
+
```yaml
|
30 |
+
--8<-- "ultralytics/cfg/datasets/african-wildlife.yaml"
|
31 |
+
```
|
32 |
+
|
33 |
+
## Usage
|
34 |
+
|
35 |
+
To train a YOLOv8n model on the African wildlife dataset for 100 epochs with an image size of 640, use the provided code samples. For a comprehensive list of available parameters, refer to the model's [Training](../../modes/train.md) page.
|
36 |
+
|
37 |
+
!!! Example "Train Example"
|
38 |
+
|
39 |
+
=== "Python"
|
40 |
+
|
41 |
+
```python
|
42 |
+
from ultralytics import YOLO
|
43 |
+
|
44 |
+
# Load a model
|
45 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
46 |
+
|
47 |
+
# Train the model
|
48 |
+
results = model.train(data='african-wildlife.yaml', epochs=100, imgsz=640)
|
49 |
+
```
|
50 |
+
|
51 |
+
=== "CLI"
|
52 |
+
|
53 |
+
```bash
|
54 |
+
# Start training from a pretrained *.pt model
|
55 |
+
yolo detect train data=african-wildlife.yaml model=yolov8n.pt epochs=100 imgsz=640
|
56 |
+
```
|
57 |
+
|
58 |
+
!!! Example "Inference Example"
|
59 |
+
|
60 |
+
=== "Python"
|
61 |
+
|
62 |
+
```python
|
63 |
+
from ultralytics import YOLO
|
64 |
+
|
65 |
+
# Load a model
|
66 |
+
model = YOLO('path/to/best.pt') # load a brain-tumor fine-tuned model
|
67 |
+
|
68 |
+
# Inference using the model
|
69 |
+
results = model.predict("https://ultralytics.com/assets/african-wildlife-sample.jpg")
|
70 |
+
```
|
71 |
+
|
72 |
+
=== "CLI"
|
73 |
+
|
74 |
+
```bash
|
75 |
+
# Start prediction with a finetuned *.pt model
|
76 |
+
yolo detect predict model='path/to/best.pt' imgsz=640 source="https://ultralytics.com/assets/african-wildlife-sample.jpg"
|
77 |
+
```
|
78 |
+
|
79 |
+
|
80 |
+
## Sample Images and Annotations
|
81 |
+
|
82 |
+
The African wildlife dataset comprises a wide variety of images showcasing diverse animal species and their natural habitats. Below are examples of images from the dataset, each accompanied by its corresponding annotations.
|
83 |
+
|
84 |
+
![African wildlife dataset sample image](https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/919f8190-ccf3-4a96-a5f1-55d9eebc77ec)
|
85 |
+
|
86 |
+
- **Mosaiced Image**: Here, we present a training batch consisting of mosaiced dataset images. Mosaicing, a training technique, combines multiple images into one, enriching batch diversity. This method helps enhance the model's ability to generalize across different object sizes, aspect ratios, and contexts.
|
87 |
+
|
88 |
+
This example illustrates the variety and complexity of images in the African wildlife dataset, emphasizing the benefits of including mosaicing during the training process.
|
89 |
+
|
90 |
+
## Citations and Acknowledgments
|
91 |
+
|
92 |
+
The dataset has been released available under the [AGPL-3.0 License](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
|
docs/en/datasets/detect/argoverse.md
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore Argoverse, a comprehensive dataset for autonomous driving tasks including 3D tracking, motion forecasting and depth estimation used in YOLO.
|
4 |
+
keywords: Argoverse dataset, autonomous driving, YOLO, 3D tracking, motion forecasting, LiDAR data, HD maps, ultralytics documentation
|
5 |
+
---
|
6 |
+
|
7 |
+
# Argoverse Dataset
|
8 |
+
|
9 |
+
The [Argoverse](https://www.argoverse.org/) dataset is a collection of data designed to support research in autonomous driving tasks, such as 3D tracking, motion forecasting, and stereo depth estimation. Developed by Argo AI, the dataset provides a wide range of high-quality sensor data, including high-resolution images, LiDAR point clouds, and map data.
|
10 |
+
|
11 |
+
!!! Note
|
12 |
+
|
13 |
+
The Argoverse dataset `*.zip` file required for training was removed from Amazon S3 after the shutdown of Argo AI by Ford, but we have made it available for manual download on [Google Drive](https://drive.google.com/file/d/1st9qW3BeIwQsnR0t8mRpvbsSWIo16ACi/view?usp=drive_link).
|
14 |
+
|
15 |
+
## Key Features
|
16 |
+
|
17 |
+
- Argoverse contains over 290K labeled 3D object tracks and 5 million object instances across 1,263 distinct scenes.
|
18 |
+
- The dataset includes high-resolution camera images, LiDAR point clouds, and richly annotated HD maps.
|
19 |
+
- Annotations include 3D bounding boxes for objects, object tracks, and trajectory information.
|
20 |
+
- Argoverse provides multiple subsets for different tasks, such as 3D tracking, motion forecasting, and stereo depth estimation.
|
21 |
+
|
22 |
+
## Dataset Structure
|
23 |
+
|
24 |
+
The Argoverse dataset is organized into three main subsets:
|
25 |
+
|
26 |
+
1. **Argoverse 3D Tracking**: This subset contains 113 scenes with over 290K labeled 3D object tracks, focusing on 3D object tracking tasks. It includes LiDAR point clouds, camera images, and sensor calibration information.
|
27 |
+
2. **Argoverse Motion Forecasting**: This subset consists of 324K vehicle trajectories collected from 60 hours of driving data, suitable for motion forecasting tasks.
|
28 |
+
3. **Argoverse Stereo Depth Estimation**: This subset is designed for stereo depth estimation tasks and includes over 10K stereo image pairs with corresponding LiDAR point clouds for ground truth depth estimation.
|
29 |
+
|
30 |
+
## Applications
|
31 |
+
|
32 |
+
The Argoverse dataset is widely used for training and evaluating deep learning models in autonomous driving tasks such as 3D object tracking, motion forecasting, and stereo depth estimation. The dataset's diverse set of sensor data, object annotations, and map information make it a valuable resource for researchers and practitioners in the field of autonomous driving.
|
33 |
+
|
34 |
+
## Dataset YAML
|
35 |
+
|
36 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. For the case of the Argoverse dataset, the `Argoverse.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/Argoverse.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/Argoverse.yaml).
|
37 |
+
|
38 |
+
!!! Example "ultralytics/cfg/datasets/Argoverse.yaml"
|
39 |
+
|
40 |
+
```yaml
|
41 |
+
--8<-- "ultralytics/cfg/datasets/Argoverse.yaml"
|
42 |
+
```
|
43 |
+
|
44 |
+
## Usage
|
45 |
+
|
46 |
+
To train a YOLOv8n model on the Argoverse dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
47 |
+
|
48 |
+
!!! Example "Train Example"
|
49 |
+
|
50 |
+
=== "Python"
|
51 |
+
|
52 |
+
```python
|
53 |
+
from ultralytics import YOLO
|
54 |
+
|
55 |
+
# Load a model
|
56 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
57 |
+
|
58 |
+
# Train the model
|
59 |
+
results = model.train(data='Argoverse.yaml', epochs=100, imgsz=640)
|
60 |
+
```
|
61 |
+
|
62 |
+
=== "CLI"
|
63 |
+
|
64 |
+
```bash
|
65 |
+
# Start training from a pretrained *.pt model
|
66 |
+
yolo detect train data=Argoverse.yaml model=yolov8n.pt epochs=100 imgsz=640
|
67 |
+
```
|
68 |
+
|
69 |
+
## Sample Data and Annotations
|
70 |
+
|
71 |
+
The Argoverse dataset contains a diverse set of sensor data, including camera images, LiDAR point clouds, and HD map information, providing rich context for autonomous driving tasks. Here are some examples of data from the dataset, along with their corresponding annotations:
|
72 |
+
|
73 |
+
![Dataset sample image](https://www.argoverse.org/assets/images/reference_images/av2_ground_height.png)
|
74 |
+
|
75 |
+
- **Argoverse 3D Tracking**: This image demonstrates an example of 3D object tracking, where objects are annotated with 3D bounding boxes. The dataset provides LiDAR point clouds and camera images to facilitate the development of models for this task.
|
76 |
+
|
77 |
+
The example showcases the variety and complexity of the data in the Argoverse dataset and highlights the importance of high-quality sensor data for autonomous driving tasks.
|
78 |
+
|
79 |
+
## Citations and Acknowledgments
|
80 |
+
|
81 |
+
If you use the Argoverse dataset in your research or development work, please cite the following paper:
|
82 |
+
|
83 |
+
!!! Quote ""
|
84 |
+
|
85 |
+
=== "BibTeX"
|
86 |
+
|
87 |
+
```bibtex
|
88 |
+
@inproceedings{chang2019argoverse,
|
89 |
+
title={Argoverse: 3D Tracking and Forecasting with Rich Maps},
|
90 |
+
author={Chang, Ming-Fang and Lambert, John and Sangkloy, Patsorn and Singh, Jagjeet and Bak, Slawomir and Hartnett, Andrew and Wang, Dequan and Carr, Peter and Lucey, Simon and Ramanan, Deva and others},
|
91 |
+
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
|
92 |
+
pages={8748--8757},
|
93 |
+
year={2019}
|
94 |
+
}
|
95 |
+
```
|
96 |
+
|
97 |
+
We would like to acknowledge Argo AI for creating and maintaining the Argoverse dataset as a valuable resource for the autonomous driving research community. For more information about the Argoverse dataset and its creators, visit the [Argoverse dataset website](https://www.argoverse.org/).
|
docs/en/datasets/detect/brain-tumor.md
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Brain tumor detection, a leading dataset for medical imaging, integrates with Ultralytics. Discover ways to use it for training YOLO models.
|
4 |
+
keywords: Ultralytics, Brain Tumor dataset, object detection, YOLO, YOLO model training, object tracking, computer vision, deep learning models
|
5 |
+
---
|
6 |
+
|
7 |
+
# Brain Tumor Dataset
|
8 |
+
|
9 |
+
A brain tumor detection dataset consists of medical images from MRI or CT scans, containing information about brain tumor presence, location, and characteristics. This dataset is essential for training computer vision algorithms to automate brain tumor identification, aiding in early diagnosis and treatment planning.
|
10 |
+
|
11 |
+
## Dataset Structure
|
12 |
+
|
13 |
+
The brain tumor dataset is divided into two subsets:
|
14 |
+
|
15 |
+
- **Training set**: Consisting of 893 images, each accompanied by corresponding annotations.
|
16 |
+
- **Testing set**: Comprising 223 images, with annotations paired for each one.
|
17 |
+
|
18 |
+
## Applications
|
19 |
+
|
20 |
+
The application of brain tumor detection using computer vision enables early diagnosis, treatment planning, and monitoring of tumor progression. By analyzing medical imaging data like MRI or CT scans, computer vision systems assist in accurately identifying brain tumors, aiding in timely medical intervention and personalized treatment strategies.
|
21 |
+
|
22 |
+
## Dataset YAML
|
23 |
+
|
24 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the brain tumor dataset, the `brain-tumor.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/brain-tumor.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/brain-tumor.yaml).
|
25 |
+
|
26 |
+
!!! Example "ultralytics/cfg/datasets/brain-tumor.yaml"
|
27 |
+
|
28 |
+
```yaml
|
29 |
+
--8<-- "ultralytics/cfg/datasets/brain-tumor.yaml"
|
30 |
+
```
|
31 |
+
|
32 |
+
## Usage
|
33 |
+
|
34 |
+
To train a YOLOv8n model on the brain tumor dataset for 100 epochs with an image size of 640, utilize the provided code snippets. For a detailed list of available arguments, consult the model's [Training](../../modes/train.md) page.
|
35 |
+
|
36 |
+
!!! Example "Train Example"
|
37 |
+
|
38 |
+
=== "Python"
|
39 |
+
|
40 |
+
```python
|
41 |
+
from ultralytics import YOLO
|
42 |
+
|
43 |
+
# Load a model
|
44 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
45 |
+
|
46 |
+
# Train the model
|
47 |
+
results = model.train(data='brain-tumor.yaml', epochs=100, imgsz=640)
|
48 |
+
```
|
49 |
+
|
50 |
+
=== "CLI"
|
51 |
+
|
52 |
+
```bash
|
53 |
+
# Start training from a pretrained *.pt model
|
54 |
+
yolo detect train data=brain-tumor.yaml model=yolov8n.pt epochs=100 imgsz=640
|
55 |
+
```
|
56 |
+
|
57 |
+
!!! Example "Inference Example"
|
58 |
+
|
59 |
+
=== "Python"
|
60 |
+
|
61 |
+
```python
|
62 |
+
from ultralytics import YOLO
|
63 |
+
|
64 |
+
# Load a model
|
65 |
+
model = YOLO('path/to/best.pt') # load a brain-tumor fine-tuned model
|
66 |
+
|
67 |
+
# Inference using the model
|
68 |
+
results = model.predict("https://ultralytics.com/assets/brain-tumor-sample.jpg")
|
69 |
+
```
|
70 |
+
|
71 |
+
=== "CLI"
|
72 |
+
|
73 |
+
```bash
|
74 |
+
# Start prediction with a finetuned *.pt model
|
75 |
+
yolo detect predict model='path/to/best.pt' imgsz=640 source="https://ultralytics.com/assets/brain-tumor-sample.jpg"
|
76 |
+
```
|
77 |
+
|
78 |
+
|
79 |
+
## Sample Images and Annotations
|
80 |
+
|
81 |
+
The brain tumor dataset encompasses a wide array of images featuring diverse object categories and intricate scenes. Presented below are examples of images from the dataset, accompanied by their respective annotations
|
82 |
+
|
83 |
+
![Brain tumor dataset sample image](https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/1741cbf5-2462-4e9a-b0b9-4a07d76cf7ef)
|
84 |
+
|
85 |
+
- **Mosaiced Image**: Displayed here is a training batch comprising mosaiced dataset images. Mosaicing, a training technique, consolidates multiple images into one, enhancing batch diversity. This approach aids in improving the model's capacity to generalize across various object sizes, aspect ratios, and contexts.
|
86 |
+
|
87 |
+
This example highlights the diversity and intricacy of images within the brain tumor dataset, underscoring the advantages of incorporating mosaicing during the training phase.
|
88 |
+
|
89 |
+
## Citations and Acknowledgments
|
90 |
+
|
91 |
+
The dataset has been released available under the [AGPL-3.0 License](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
|
docs/en/datasets/detect/coco.md
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn how COCO, a leading dataset for object detection and segmentation, integrates with Ultralytics. Discover ways to use it for training YOLO models.
|
4 |
+
keywords: Ultralytics, COCO dataset, object detection, YOLO, YOLO model training, image segmentation, computer vision, deep learning models
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO Dataset
|
8 |
+
|
9 |
+
The [COCO](https://cocodataset.org/#home) (Common Objects in Context) dataset is a large-scale object detection, segmentation, and captioning dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking computer vision models. It is an essential dataset for researchers and developers working on object detection, segmentation, and pose estimation tasks.
|
10 |
+
|
11 |
+
<p align="center">
|
12 |
+
<br>
|
13 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/uDrn9QZJ2lk"
|
14 |
+
title="YouTube video player" frameborder="0"
|
15 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
16 |
+
allowfullscreen>
|
17 |
+
</iframe>
|
18 |
+
<br>
|
19 |
+
<strong>Watch:</strong> Ultralytics COCO Dataset Overview
|
20 |
+
</p>
|
21 |
+
|
22 |
+
## Key Features
|
23 |
+
|
24 |
+
- COCO contains 330K images, with 200K images having annotations for object detection, segmentation, and captioning tasks.
|
25 |
+
- The dataset comprises 80 object categories, including common objects like cars, bicycles, and animals, as well as more specific categories such as umbrellas, handbags, and sports equipment.
|
26 |
+
- Annotations include object bounding boxes, segmentation masks, and captions for each image.
|
27 |
+
- COCO provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for segmentation tasks, making it suitable for comparing model performance.
|
28 |
+
|
29 |
+
## Dataset Structure
|
30 |
+
|
31 |
+
The COCO dataset is split into three subsets:
|
32 |
+
|
33 |
+
1. **Train2017**: This subset contains 118K images for training object detection, segmentation, and captioning models.
|
34 |
+
2. **Val2017**: This subset has 5K images used for validation purposes during model training.
|
35 |
+
3. **Test2017**: This subset consists of 20K images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation.
|
36 |
+
|
37 |
+
## Applications
|
38 |
+
|
39 |
+
The COCO dataset is widely used for training and evaluating deep learning models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN), and keypoint detection (such as OpenPose). The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
|
40 |
+
|
41 |
+
## Dataset YAML
|
42 |
+
|
43 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO dataset, the `coco.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml).
|
44 |
+
|
45 |
+
!!! Example "ultralytics/cfg/datasets/coco.yaml"
|
46 |
+
|
47 |
+
```yaml
|
48 |
+
--8<-- "ultralytics/cfg/datasets/coco.yaml"
|
49 |
+
```
|
50 |
+
|
51 |
+
## Usage
|
52 |
+
|
53 |
+
To train a YOLOv8n model on the COCO dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
54 |
+
|
55 |
+
!!! Example "Train Example"
|
56 |
+
|
57 |
+
=== "Python"
|
58 |
+
|
59 |
+
```python
|
60 |
+
from ultralytics import YOLO
|
61 |
+
|
62 |
+
# Load a model
|
63 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
64 |
+
|
65 |
+
# Train the model
|
66 |
+
results = model.train(data='coco.yaml', epochs=100, imgsz=640)
|
67 |
+
```
|
68 |
+
|
69 |
+
=== "CLI"
|
70 |
+
|
71 |
+
```bash
|
72 |
+
# Start training from a pretrained *.pt model
|
73 |
+
yolo detect train data=coco.yaml model=yolov8n.pt epochs=100 imgsz=640
|
74 |
+
```
|
75 |
+
|
76 |
+
## Sample Images and Annotations
|
77 |
+
|
78 |
+
The COCO dataset contains a diverse set of images with various object categories and complex scenes. Here are some examples of images from the dataset, along with their corresponding annotations:
|
79 |
+
|
80 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/236811818-5b566576-1e92-42fa-9462-4b6a848abe89.jpg)
|
81 |
+
|
82 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
83 |
+
|
84 |
+
The example showcases the variety and complexity of the images in the COCO dataset and the benefits of using mosaicing during the training process.
|
85 |
+
|
86 |
+
## Citations and Acknowledgments
|
87 |
+
|
88 |
+
If you use the COCO dataset in your research or development work, please cite the following paper:
|
89 |
+
|
90 |
+
!!! Quote ""
|
91 |
+
|
92 |
+
=== "BibTeX"
|
93 |
+
|
94 |
+
```bibtex
|
95 |
+
@misc{lin2015microsoft,
|
96 |
+
title={Microsoft COCO: Common Objects in Context},
|
97 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
98 |
+
year={2015},
|
99 |
+
eprint={1405.0312},
|
100 |
+
archivePrefix={arXiv},
|
101 |
+
primaryClass={cs.CV}
|
102 |
+
}
|
103 |
+
```
|
104 |
+
|
105 |
+
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/detect/coco8.md
ADDED
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the benefits of using the practical and diverse COCO8 dataset for object detection model testing. Learn to configure and use it via Ultralytics HUB and YOLOv8.
|
4 |
+
keywords: Ultralytics, COCO8 dataset, object detection, model testing, dataset configuration, detection approaches, sanity check, training pipelines, YOLOv8
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO8 Dataset
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
[Ultralytics](https://ultralytics.com) COCO8 is a small, but versatile object detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
|
12 |
+
|
13 |
+
<p align="center">
|
14 |
+
<br>
|
15 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/uDrn9QZJ2lk"
|
16 |
+
title="YouTube video player" frameborder="0"
|
17 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
18 |
+
allowfullscreen>
|
19 |
+
</iframe>
|
20 |
+
<br>
|
21 |
+
<strong>Watch:</strong> Ultralytics COCO Dataset Overview
|
22 |
+
</p>
|
23 |
+
|
24 |
+
This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com) and [YOLOv8](https://github.com/ultralytics/ultralytics).
|
25 |
+
|
26 |
+
## Dataset YAML
|
27 |
+
|
28 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO8 dataset, the `coco8.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8.yaml).
|
29 |
+
|
30 |
+
!!! Example "ultralytics/cfg/datasets/coco8.yaml"
|
31 |
+
|
32 |
+
```yaml
|
33 |
+
--8<-- "ultralytics/cfg/datasets/coco8.yaml"
|
34 |
+
```
|
35 |
+
|
36 |
+
## Usage
|
37 |
+
|
38 |
+
To train a YOLOv8n model on the COCO8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
39 |
+
|
40 |
+
!!! Example "Train Example"
|
41 |
+
|
42 |
+
=== "Python"
|
43 |
+
|
44 |
+
```python
|
45 |
+
from ultralytics import YOLO
|
46 |
+
|
47 |
+
# Load a model
|
48 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
49 |
+
|
50 |
+
# Train the model
|
51 |
+
results = model.train(data='coco8.yaml', epochs=100, imgsz=640)
|
52 |
+
```
|
53 |
+
|
54 |
+
=== "CLI"
|
55 |
+
|
56 |
+
```bash
|
57 |
+
# Start training from a pretrained *.pt model
|
58 |
+
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640
|
59 |
+
```
|
60 |
+
|
61 |
+
## Sample Images and Annotations
|
62 |
+
|
63 |
+
Here are some examples of images from the COCO8 dataset, along with their corresponding annotations:
|
64 |
+
|
65 |
+
<img src="https://user-images.githubusercontent.com/26833433/236818348-e6260a3d-0454-436b-83a9-de366ba07235.jpg" alt="Dataset sample image" width="800">
|
66 |
+
|
67 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
68 |
+
|
69 |
+
The example showcases the variety and complexity of the images in the COCO8 dataset and the benefits of using mosaicing during the training process.
|
70 |
+
|
71 |
+
## Citations and Acknowledgments
|
72 |
+
|
73 |
+
If you use the COCO dataset in your research or development work, please cite the following paper:
|
74 |
+
|
75 |
+
!!! Quote ""
|
76 |
+
|
77 |
+
=== "BibTeX"
|
78 |
+
|
79 |
+
```bibtex
|
80 |
+
@misc{lin2015microsoft,
|
81 |
+
title={Microsoft COCO: Common Objects in Context},
|
82 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
83 |
+
year={2015},
|
84 |
+
eprint={1405.0312},
|
85 |
+
archivePrefix={arXiv},
|
86 |
+
primaryClass={cs.CV}
|
87 |
+
}
|
88 |
+
```
|
89 |
+
|
90 |
+
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/detect/globalwheat2020.md
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Understand how to utilize the vast Global Wheat Head Dataset for building wheat head detection models. Features, structure, applications, usage, sample data, and citation.
|
4 |
+
keywords: Ultralytics, YOLO, Global Wheat Head Dataset, wheat head detection, plant phenotyping, crop management, deep learning, outdoor images, annotations, YAML configuration
|
5 |
+
---
|
6 |
+
|
7 |
+
# Global Wheat Head Dataset
|
8 |
+
|
9 |
+
The [Global Wheat Head Dataset](https://www.global-wheat.com/) is a collection of images designed to support the development of accurate wheat head detection models for applications in wheat phenotyping and crop management. Wheat heads, also known as spikes, are the grain-bearing parts of the wheat plant. Accurate estimation of wheat head density and size is essential for assessing crop health, maturity, and yield potential. The dataset, created by a collaboration of nine research institutes from seven countries, covers multiple growing regions to ensure models generalize well across different environments.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- The dataset contains over 3,000 training images from Europe (France, UK, Switzerland) and North America (Canada).
|
14 |
+
- It includes approximately 1,000 test images from Australia, Japan, and China.
|
15 |
+
- Images are outdoor field images, capturing the natural variability in wheat head appearances.
|
16 |
+
- Annotations include wheat head bounding boxes to support object detection tasks.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The Global Wheat Head Dataset is organized into two main subsets:
|
21 |
+
|
22 |
+
1. **Training Set**: This subset contains over 3,000 images from Europe and North America. The images are labeled with wheat head bounding boxes, providing ground truth for training object detection models.
|
23 |
+
2. **Test Set**: This subset consists of approximately 1,000 images from Australia, Japan, and China. These images are used for evaluating the performance of trained models on unseen genotypes, environments, and observational conditions.
|
24 |
+
|
25 |
+
## Applications
|
26 |
+
|
27 |
+
The Global Wheat Head Dataset is widely used for training and evaluating deep learning models in wheat head detection tasks. The dataset's diverse set of images, capturing a wide range of appearances, environments, and conditions, make it a valuable resource for researchers and practitioners in the field of plant phenotyping and crop management.
|
28 |
+
|
29 |
+
## Dataset YAML
|
30 |
+
|
31 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. For the case of the Global Wheat Head Dataset, the `GlobalWheat2020.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/GlobalWheat2020.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/GlobalWheat2020.yaml).
|
32 |
+
|
33 |
+
!!! Example "ultralytics/cfg/datasets/GlobalWheat2020.yaml"
|
34 |
+
|
35 |
+
```yaml
|
36 |
+
--8<-- "ultralytics/cfg/datasets/GlobalWheat2020.yaml"
|
37 |
+
```
|
38 |
+
|
39 |
+
## Usage
|
40 |
+
|
41 |
+
To train a YOLOv8n model on the Global Wheat Head Dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
42 |
+
|
43 |
+
!!! Example "Train Example"
|
44 |
+
|
45 |
+
=== "Python"
|
46 |
+
|
47 |
+
```python
|
48 |
+
from ultralytics import YOLO
|
49 |
+
|
50 |
+
# Load a model
|
51 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
52 |
+
|
53 |
+
# Train the model
|
54 |
+
results = model.train(data='GlobalWheat2020.yaml', epochs=100, imgsz=640)
|
55 |
+
```
|
56 |
+
|
57 |
+
=== "CLI"
|
58 |
+
|
59 |
+
```bash
|
60 |
+
# Start training from a pretrained *.pt model
|
61 |
+
yolo detect train data=GlobalWheat2020.yaml model=yolov8n.pt epochs=100 imgsz=640
|
62 |
+
```
|
63 |
+
|
64 |
+
## Sample Data and Annotations
|
65 |
+
|
66 |
+
The Global Wheat Head Dataset contains a diverse set of outdoor field images, capturing the natural variability in wheat head appearances, environments, and conditions. Here are some examples of data from the dataset, along with their corresponding annotations:
|
67 |
+
|
68 |
+
![Dataset sample image](https://i.ytimg.com/vi/yqvMuw-uedU/maxresdefault.jpg)
|
69 |
+
|
70 |
+
- **Wheat Head Detection**: This image demonstrates an example of wheat head detection, where wheat heads are annotated with bounding boxes. The dataset provides a variety of images to facilitate the development of models for this task.
|
71 |
+
|
72 |
+
The example showcases the variety and complexity of the data in the Global Wheat Head Dataset and highlights the importance of accurate wheat head detection for applications in wheat phenotyping and crop management.
|
73 |
+
|
74 |
+
## Citations and Acknowledgments
|
75 |
+
|
76 |
+
If you use the Global Wheat Head Dataset in your research or development work, please cite the following paper:
|
77 |
+
|
78 |
+
!!! Quote ""
|
79 |
+
|
80 |
+
=== "BibTeX"
|
81 |
+
|
82 |
+
```bibtex
|
83 |
+
@article{david2020global,
|
84 |
+
title={Global Wheat Head Detection (GWHD) Dataset: A Large and Diverse Dataset of High-Resolution RGB-Labelled Images to Develop and Benchmark Wheat Head Detection Methods},
|
85 |
+
author={David, Etienne and Madec, Simon and Sadeghi-Tehran, Pouria and Aasen, Helge and Zheng, Bangyou and Liu, Shouyang and Kirchgessner, Norbert and Ishikawa, Goro and Nagasawa, Koichi and Badhon, Minhajul and others},
|
86 |
+
journal={arXiv preprint arXiv:2005.02162},
|
87 |
+
year={2020}
|
88 |
+
}
|
89 |
+
```
|
90 |
+
|
91 |
+
We would like to acknowledge the researchers and institutions that contributed to the creation and maintenance of the Global Wheat Head Dataset as a valuable resource for the plant phenotyping and crop management research community. For more information about the dataset and its creators, visit the [Global Wheat Head Dataset website](https://www.global-wheat.com/).
|
docs/en/datasets/detect/index.md
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Navigate through supported dataset formats, methods to utilize them and how to add your own datasets. Get insights on porting or converting label formats.
|
4 |
+
keywords: Ultralytics, YOLO, datasets, object detection, dataset formats, label formats, data conversion
|
5 |
+
---
|
6 |
+
|
7 |
+
# Object Detection Datasets Overview
|
8 |
+
|
9 |
+
Training a robust and accurate object detection model requires a comprehensive dataset. This guide introduces various formats of datasets that are compatible with the Ultralytics YOLO model and provides insights into their structure, usage, and how to convert between different formats.
|
10 |
+
|
11 |
+
## Supported Dataset Formats
|
12 |
+
|
13 |
+
### Ultralytics YOLO format
|
14 |
+
|
15 |
+
The Ultralytics YOLO format is a dataset configuration format that allows you to define the dataset root directory, the relative paths to training/validation/testing image directories or `*.txt` files containing image paths, and a dictionary of class names. Here is an example:
|
16 |
+
|
17 |
+
```yaml
|
18 |
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
19 |
+
path: ../datasets/coco8 # dataset root dir
|
20 |
+
train: images/train # train images (relative to 'path') 4 images
|
21 |
+
val: images/val # val images (relative to 'path') 4 images
|
22 |
+
test: # test images (optional)
|
23 |
+
|
24 |
+
# Classes (80 COCO classes)
|
25 |
+
names:
|
26 |
+
0: person
|
27 |
+
1: bicycle
|
28 |
+
2: car
|
29 |
+
# ...
|
30 |
+
77: teddy bear
|
31 |
+
78: hair drier
|
32 |
+
79: toothbrush
|
33 |
+
```
|
34 |
+
|
35 |
+
Labels for this format should be exported to YOLO format with one `*.txt` file per image. If there are no objects in an image, no `*.txt` file is required. The `*.txt` file should be formatted with one row per object in `class x_center y_center width height` format. Box coordinates must be in **normalized xywh** format (from 0 to 1). If your boxes are in pixels, you should divide `x_center` and `width` by image width, and `y_center` and `height` by image height. Class numbers should be zero-indexed (start with 0).
|
36 |
+
|
37 |
+
<p align="center"><img width="750" src="https://user-images.githubusercontent.com/26833433/91506361-c7965000-e886-11ea-8291-c72b98c25eec.jpg" alt="Example labelled image"></p>
|
38 |
+
|
39 |
+
The label file corresponding to the above image contains 2 persons (class `0`) and a tie (class `27`):
|
40 |
+
|
41 |
+
<p align="center"><img width="428" src="https://user-images.githubusercontent.com/26833433/112467037-d2568c00-8d66-11eb-8796-55402ac0d62f.png" alt="Example label file"></p>
|
42 |
+
|
43 |
+
When using the Ultralytics YOLO format, organize your training and validation images and labels as shown in the example below.
|
44 |
+
|
45 |
+
<p align="center"><img width="700" src="https://user-images.githubusercontent.com/26833433/134436012-65111ad1-9541-4853-81a6-f19a3468b75f.png" alt="Example dataset directory structure"></p>
|
46 |
+
|
47 |
+
## Usage
|
48 |
+
|
49 |
+
Here's how you can use these formats to train your model:
|
50 |
+
|
51 |
+
!!! Example
|
52 |
+
|
53 |
+
=== "Python"
|
54 |
+
|
55 |
+
```python
|
56 |
+
from ultralytics import YOLO
|
57 |
+
|
58 |
+
# Load a model
|
59 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
60 |
+
|
61 |
+
# Train the model
|
62 |
+
results = model.train(data='coco8.yaml', epochs=100, imgsz=640)
|
63 |
+
```
|
64 |
+
=== "CLI"
|
65 |
+
|
66 |
+
```bash
|
67 |
+
# Start training from a pretrained *.pt model
|
68 |
+
yolo detect train data=coco8.yaml model=yolov8n.pt epochs=100 imgsz=640
|
69 |
+
```
|
70 |
+
|
71 |
+
## Supported Datasets
|
72 |
+
|
73 |
+
Here is a list of the supported datasets and a brief description for each:
|
74 |
+
|
75 |
+
- [**Argoverse**](argoverse.md): A collection of sensor data collected from autonomous vehicles. It contains 3D tracking annotations for car objects.
|
76 |
+
- [**COCO**](coco.md): Common Objects in Context (COCO) is a large-scale object detection, segmentation, and captioning dataset with 80 object categories.
|
77 |
+
- [**COCO8**](coco8.md): A smaller subset of the COCO dataset, COCO8 is more lightweight and faster to train.
|
78 |
+
- [**GlobalWheat2020**](globalwheat2020.md): A dataset containing images of wheat heads for the Global Wheat Challenge 2020.
|
79 |
+
- [**Objects365**](objects365.md): A large-scale object detection dataset with 365 object categories and 600k images, aimed at advancing object detection research.
|
80 |
+
- [**OpenImagesV7**](open-images-v7.md): A comprehensive dataset by Google with 1.7M train images and 42k validation images.
|
81 |
+
- [**SKU-110K**](sku-110k.md): A dataset containing images of densely packed retail products, intended for retail environment object detection.
|
82 |
+
- [**VisDrone**](visdrone.md): A dataset focusing on drone-based images, containing various object categories like cars, pedestrians, and cyclists.
|
83 |
+
- [**VOC**](voc.md): PASCAL VOC is a popular object detection dataset with 20 object categories including vehicles, animals, and furniture.
|
84 |
+
- [**xView**](xview.md): A dataset containing high-resolution satellite imagery, designed for the detection of various object classes in overhead views.
|
85 |
+
- [**Brain-tumor**](brain-tumor.md): This dataset comprises MRI or CT scan images containing information about brain tumor presence, location, and characteristics. It plays a crucial role in training computer vision models to automate tumor identification, facilitating early diagnosis and treatment planning.
|
86 |
+
- [**African-wildlife**](african-wildlife.md): Featuring images of African wildlife such as buffalo, elephant, rhino, and zebra, this dataset is instrumental in training computer vision models. It is indispensable for identifying animals across different habitats and contributes significantly to wildlife research endeavors.
|
87 |
+
|
88 |
+
### Adding your own dataset
|
89 |
+
|
90 |
+
If you have your own dataset and would like to use it for training detection models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.
|
91 |
+
|
92 |
+
## Port or Convert Label Formats
|
93 |
+
|
94 |
+
### COCO Dataset Format to YOLO Format
|
95 |
+
|
96 |
+
You can easily convert labels from the popular COCO dataset format to the YOLO format using the following code snippet:
|
97 |
+
|
98 |
+
!!! Example
|
99 |
+
|
100 |
+
=== "Python"
|
101 |
+
|
102 |
+
```python
|
103 |
+
from ultralytics.data.converter import convert_coco
|
104 |
+
|
105 |
+
convert_coco(labels_dir='path/to/coco/annotations/')
|
106 |
+
```
|
107 |
+
|
108 |
+
This conversion tool can be used to convert the COCO dataset or any dataset in the COCO format to the Ultralytics YOLO format.
|
109 |
+
|
110 |
+
Remember to double-check if the dataset you want to use is compatible with your model and follows the necessary format conventions. Properly formatted datasets are crucial for training successful object detection models.
|
docs/en/datasets/detect/objects365.md
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the Objects365 dataset, a wide-scale, high-quality resource for object detection research. Learn to use it with the Ultralytics YOLO model.
|
4 |
+
keywords: Objects365, object detection, Ultralytics, dataset, YOLO, bounding boxes, annotations, computer vision, deep learning, training models
|
5 |
+
---
|
6 |
+
|
7 |
+
# Objects365 Dataset
|
8 |
+
|
9 |
+
The [Objects365](https://www.objects365.org/) dataset is a large-scale, high-quality dataset designed to foster object detection research with a focus on diverse objects in the wild. Created by a team of [Megvii](https://en.megvii.com/) researchers, the dataset offers a wide range of high-resolution images with a comprehensive set of annotated bounding boxes covering 365 object categories.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- Objects365 contains 365 object categories, with 2 million images and over 30 million bounding boxes.
|
14 |
+
- The dataset includes diverse objects in various scenarios, providing a rich and challenging benchmark for object detection tasks.
|
15 |
+
- Annotations include bounding boxes for objects, making it suitable for training and evaluating object detection models.
|
16 |
+
- Objects365 pre-trained models significantly outperform ImageNet pre-trained models, leading to better generalization on various tasks.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The Objects365 dataset is organized into a single set of images with corresponding annotations:
|
21 |
+
|
22 |
+
- **Images**: The dataset includes 2 million high-resolution images, each containing a variety of objects across 365 categories.
|
23 |
+
- **Annotations**: The images are annotated with over 30 million bounding boxes, providing comprehensive ground truth information for object detection tasks.
|
24 |
+
|
25 |
+
## Applications
|
26 |
+
|
27 |
+
The Objects365 dataset is widely used for training and evaluating deep learning models in object detection tasks. The dataset's diverse set of object categories and high-quality annotations make it a valuable resource for researchers and practitioners in the field of computer vision.
|
28 |
+
|
29 |
+
## Dataset YAML
|
30 |
+
|
31 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. For the case of the Objects365 Dataset, the `Objects365.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/Objects365.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/Objects365.yaml).
|
32 |
+
|
33 |
+
!!! Example "ultralytics/cfg/datasets/Objects365.yaml"
|
34 |
+
|
35 |
+
```yaml
|
36 |
+
--8<-- "ultralytics/cfg/datasets/Objects365.yaml"
|
37 |
+
```
|
38 |
+
|
39 |
+
## Usage
|
40 |
+
|
41 |
+
To train a YOLOv8n model on the Objects365 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
42 |
+
|
43 |
+
!!! Example "Train Example"
|
44 |
+
|
45 |
+
=== "Python"
|
46 |
+
|
47 |
+
```python
|
48 |
+
from ultralytics import YOLO
|
49 |
+
|
50 |
+
# Load a model
|
51 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
52 |
+
|
53 |
+
# Train the model
|
54 |
+
results = model.train(data='Objects365.yaml', epochs=100, imgsz=640)
|
55 |
+
```
|
56 |
+
|
57 |
+
=== "CLI"
|
58 |
+
|
59 |
+
```bash
|
60 |
+
# Start training from a pretrained *.pt model
|
61 |
+
yolo detect train data=Objects365.yaml model=yolov8n.pt epochs=100 imgsz=640
|
62 |
+
```
|
63 |
+
|
64 |
+
## Sample Data and Annotations
|
65 |
+
|
66 |
+
The Objects365 dataset contains a diverse set of high-resolution images with objects from 365 categories, providing rich context for object detection tasks. Here are some examples of the images in the dataset:
|
67 |
+
|
68 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/238215467-caf757dd-0b87-4b0d-bb19-d94a547f7fbf.jpg)
|
69 |
+
|
70 |
+
- **Objects365**: This image demonstrates an example of object detection, where objects are annotated with bounding boxes. The dataset provides a wide range of images to facilitate the development of models for this task.
|
71 |
+
|
72 |
+
The example showcases the variety and complexity of the data in the Objects365 dataset and highlights the importance of accurate object detection for computer vision applications.
|
73 |
+
|
74 |
+
## Citations and Acknowledgments
|
75 |
+
|
76 |
+
If you use the Objects365 dataset in your research or development work, please cite the following paper:
|
77 |
+
|
78 |
+
!!! Quote ""
|
79 |
+
|
80 |
+
=== "BibTeX"
|
81 |
+
|
82 |
+
```bibtex
|
83 |
+
@inproceedings{shao2019objects365,
|
84 |
+
title={Objects365: A Large-scale, High-quality Dataset for Object Detection},
|
85 |
+
author={Shao, Shuai and Li, Zeming and Zhang, Tianyuan and Peng, Chao and Yu, Gang and Li, Jing and Zhang, Xiangyu and Sun, Jian},
|
86 |
+
booktitle={Proceedings of the IEEE/CVF International Conference on Computer Vision},
|
87 |
+
pages={8425--8434},
|
88 |
+
year={2019}
|
89 |
+
}
|
90 |
+
```
|
91 |
+
|
92 |
+
We would like to acknowledge the team of researchers who created and maintain the Objects365 dataset as a valuable resource for the computer vision research community. For more information about the Objects365 dataset and its creators, visit the [Objects365 dataset website](https://www.objects365.org/).
|
docs/en/datasets/detect/open-images-v7.md
ADDED
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Dive into Google's Open Images V7, a comprehensive dataset offering a broad scope for computer vision research. Understand its usage with deep learning models.
|
4 |
+
keywords: Open Images V7, object detection, segmentation masks, visual relationships, localized narratives, computer vision, deep learning, annotations, bounding boxes
|
5 |
+
---
|
6 |
+
|
7 |
+
# Open Images V7 Dataset
|
8 |
+
|
9 |
+
[Open Images V7](https://storage.googleapis.com/openimages/web/index.html) is a versatile and expansive dataset championed by Google. Aimed at propelling research in the realm of computer vision, it boasts a vast collection of images annotated with a plethora of data, including image-level labels, object bounding boxes, object segmentation masks, visual relationships, and localized narratives.
|
10 |
+
|
11 |
+
## Open Images V7 Pretrained Models
|
12 |
+
|
13 |
+
| Model | size<br><sup>(pixels) | mAP<sup>val<br>50-95 | Speed<br><sup>CPU ONNX<br>(ms) | Speed<br><sup>A100 TensorRT<br>(ms) | params<br><sup>(M) | FLOPs<br><sup>(B) |
|
14 |
+
|-------------------------------------------------------------------------------------------|-----------------------|----------------------|--------------------------------|-------------------------------------|--------------------|-------------------|
|
15 |
+
| [YOLOv8n](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8n-oiv7.pt) | 640 | 18.4 | 142.4 | 1.21 | 3.5 | 10.5 |
|
16 |
+
| [YOLOv8s](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8s-oiv7.pt) | 640 | 27.7 | 183.1 | 1.40 | 11.4 | 29.7 |
|
17 |
+
| [YOLOv8m](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8m-oiv7.pt) | 640 | 33.6 | 408.5 | 2.26 | 26.2 | 80.6 |
|
18 |
+
| [YOLOv8l](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8l-oiv7.pt) | 640 | 34.9 | 596.9 | 2.43 | 44.1 | 167.4 |
|
19 |
+
| [YOLOv8x](https://github.com/ultralytics/assets/releases/download/v8.1.0/yolov8x-oiv7.pt) | 640 | 36.3 | 860.6 | 3.56 | 68.7 | 260.6 |
|
20 |
+
|
21 |
+
![Open Images V7 classes visual](https://user-images.githubusercontent.com/26833433/258660358-2dc07771-ec08-4d11-b24a-f66e07550050.png)
|
22 |
+
|
23 |
+
## Key Features
|
24 |
+
|
25 |
+
- Encompasses ~9M images annotated in various ways to suit multiple computer vision tasks.
|
26 |
+
- Houses a staggering 16M bounding boxes across 600 object classes in 1.9M images. These boxes are primarily hand-drawn by experts ensuring high precision.
|
27 |
+
- Visual relationship annotations totaling 3.3M are available, detailing 1,466 unique relationship triplets, object properties, and human activities.
|
28 |
+
- V5 introduced segmentation masks for 2.8M objects across 350 classes.
|
29 |
+
- V6 introduced 675k localized narratives that amalgamate voice, text, and mouse traces highlighting described objects.
|
30 |
+
- V7 introduced 66.4M point-level labels on 1.4M images, spanning 5,827 classes.
|
31 |
+
- Encompasses 61.4M image-level labels across a diverse set of 20,638 classes.
|
32 |
+
- Provides a unified platform for image classification, object detection, relationship detection, instance segmentation, and multimodal image descriptions.
|
33 |
+
|
34 |
+
## Dataset Structure
|
35 |
+
|
36 |
+
Open Images V7 is structured in multiple components catering to varied computer vision challenges:
|
37 |
+
|
38 |
+
- **Images**: About 9 million images, often showcasing intricate scenes with an average of 8.3 objects per image.
|
39 |
+
- **Bounding Boxes**: Over 16 million boxes that demarcate objects across 600 categories.
|
40 |
+
- **Segmentation Masks**: These detail the exact boundary of 2.8M objects across 350 classes.
|
41 |
+
- **Visual Relationships**: 3.3M annotations indicating object relationships, properties, and actions.
|
42 |
+
- **Localized Narratives**: 675k descriptions combining voice, text, and mouse traces.
|
43 |
+
- **Point-Level Labels**: 66.4M labels across 1.4M images, suitable for zero/few-shot semantic segmentation.
|
44 |
+
|
45 |
+
## Applications
|
46 |
+
|
47 |
+
Open Images V7 is a cornerstone for training and evaluating state-of-the-art models in various computer vision tasks. The dataset's broad scope and high-quality annotations make it indispensable for researchers and developers specializing in computer vision.
|
48 |
+
|
49 |
+
## Dataset YAML
|
50 |
+
|
51 |
+
Typically, datasets come with a YAML (Yet Another Markup Language) file that delineates the dataset's configuration. For the case of Open Images V7, a hypothetical `OpenImagesV7.yaml` might exist. For accurate paths and configurations, one should refer to the dataset's official repository or documentation.
|
52 |
+
|
53 |
+
!!! Example "OpenImagesV7.yaml"
|
54 |
+
|
55 |
+
```yaml
|
56 |
+
--8<-- "ultralytics/cfg/datasets/open-images-v7.yaml"
|
57 |
+
```
|
58 |
+
|
59 |
+
## Usage
|
60 |
+
|
61 |
+
To train a YOLOv8n model on the Open Images V7 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
62 |
+
|
63 |
+
!!! Warning
|
64 |
+
|
65 |
+
The complete Open Images V7 dataset comprises 1,743,042 training images and 41,620 validation images, requiring approximately **561 GB of storage space** upon download.
|
66 |
+
|
67 |
+
Executing the commands provided below will trigger an automatic download of the full dataset if it's not already present locally. Before running the below example it's crucial to:
|
68 |
+
|
69 |
+
- Verify that your device has enough storage capacity.
|
70 |
+
- Ensure a robust and speedy internet connection.
|
71 |
+
|
72 |
+
!!! Example "Train Example"
|
73 |
+
|
74 |
+
=== "Python"
|
75 |
+
|
76 |
+
```python
|
77 |
+
from ultralytics import YOLO
|
78 |
+
|
79 |
+
# Load a COCO-pretrained YOLOv8n model
|
80 |
+
model = YOLO('yolov8n.pt')
|
81 |
+
|
82 |
+
# Train the model on the Open Images V7 dataset
|
83 |
+
results = model.train(data='open-images-v7.yaml', epochs=100, imgsz=640)
|
84 |
+
```
|
85 |
+
|
86 |
+
=== "CLI"
|
87 |
+
|
88 |
+
```bash
|
89 |
+
# Train a COCO-pretrained YOLOv8n model on the Open Images V7 dataset
|
90 |
+
yolo detect train data=open-images-v7.yaml model=yolov8n.pt epochs=100 imgsz=640
|
91 |
+
```
|
92 |
+
|
93 |
+
## Sample Data and Annotations
|
94 |
+
|
95 |
+
Illustrations of the dataset help provide insights into its richness:
|
96 |
+
|
97 |
+
![Dataset sample image](https://storage.googleapis.com/openimages/web/images/oidv7_all-in-one_example_ab.jpg)
|
98 |
+
|
99 |
+
- **Open Images V7**: This image exemplifies the depth and detail of annotations available, including bounding boxes, relationships, and segmentation masks.
|
100 |
+
|
101 |
+
Researchers can gain invaluable insights into the array of computer vision challenges that the dataset addresses, from basic object detection to intricate relationship identification.
|
102 |
+
|
103 |
+
## Citations and Acknowledgments
|
104 |
+
|
105 |
+
For those employing Open Images V7 in their work, it's prudent to cite the relevant papers and acknowledge the creators:
|
106 |
+
|
107 |
+
!!! Quote ""
|
108 |
+
|
109 |
+
=== "BibTeX"
|
110 |
+
|
111 |
+
```bibtex
|
112 |
+
@article{OpenImages,
|
113 |
+
author = {Alina Kuznetsova and Hassan Rom and Neil Alldrin and Jasper Uijlings and Ivan Krasin and Jordi Pont-Tuset and Shahab Kamali and Stefan Popov and Matteo Malloci and Alexander Kolesnikov and Tom Duerig and Vittorio Ferrari},
|
114 |
+
title = {The Open Images Dataset V4: Unified image classification, object detection, and visual relationship detection at scale},
|
115 |
+
year = {2020},
|
116 |
+
journal = {IJCV}
|
117 |
+
}
|
118 |
+
```
|
119 |
+
|
120 |
+
A heartfelt acknowledgment goes out to the Google AI team for creating and maintaining the Open Images V7 dataset. For a deep dive into the dataset and its offerings, navigate to the [official Open Images V7 website](https://storage.googleapis.com/openimages/web/index.html).
|
docs/en/datasets/detect/roboflow-100.md
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Get to know Roboflow 100, a comprehensive object detection benchmark that brings together 100 datasets from different domains.
|
4 |
+
keywords: Ultralytics, YOLOv8, YOLO models, Roboflow 100, object detection, benchmark, computer vision, datasets, deep learning models
|
5 |
+
---
|
6 |
+
|
7 |
+
# Roboflow 100 Dataset
|
8 |
+
|
9 |
+
Roboflow 100, developed by [Roboflow](https://roboflow.com/?ref=ultralytics) and sponsored by Intel, is a groundbreaking [object detection](../../tasks/detect.md) benchmark. It includes 100 diverse datasets sampled from over 90,000 public datasets. This benchmark is designed to test the adaptability of models to various domains, including healthcare, aerial imagery, and video games.
|
10 |
+
|
11 |
+
<p align="center">
|
12 |
+
<img width="640" src="https://user-images.githubusercontent.com/15908060/202452898-9ca6b8f7-4805-4e8e-949a-6e080d7b94d2.jpg" alt="Roboflow 100 Overview">
|
13 |
+
</p>
|
14 |
+
|
15 |
+
## Key Features
|
16 |
+
|
17 |
+
- Includes 100 datasets across seven domains: Aerial, Video games, Microscopic, Underwater, Documents, Electromagnetic, and Real World.
|
18 |
+
- The benchmark comprises 224,714 images across 805 classes, thanks to over 11,170 hours of labeling efforts.
|
19 |
+
- All images are resized to 640x640 pixels, with a focus on eliminating class ambiguity and filtering out underrepresented classes.
|
20 |
+
- Annotations include bounding boxes for objects, making it suitable for [training](../../modes/train.md) and evaluating object detection models.
|
21 |
+
|
22 |
+
## Dataset Structure
|
23 |
+
|
24 |
+
The Roboflow 100 dataset is organized into seven categories, each with a distinct set of datasets, images, and classes:
|
25 |
+
|
26 |
+
- **Aerial**: Consists of 7 datasets with a total of 9,683 images, covering 24 distinct classes.
|
27 |
+
- **Video Games**: Includes 7 datasets, featuring 11,579 images across 88 classes.
|
28 |
+
- **Microscopic**: Comprises 11 datasets with 13,378 images, spanning 28 classes.
|
29 |
+
- **Underwater**: Contains 5 datasets, encompassing 18,003 images in 39 classes.
|
30 |
+
- **Documents**: Consists of 8 datasets with 24,813 images, divided into 90 classes.
|
31 |
+
- **Electromagnetic**: Made up of 12 datasets, totaling 36,381 images in 41 classes.
|
32 |
+
- **Real World**: The largest category with 50 datasets, offering 110,615 images across 495 classes.
|
33 |
+
|
34 |
+
This structure enables a diverse and extensive testing ground for object detection models, reflecting real-world application scenarios.
|
35 |
+
|
36 |
+
## Applications
|
37 |
+
|
38 |
+
Roboflow 100 is invaluable for various applications related to computer vision and deep learning. Researchers and engineers can use this benchmark to:
|
39 |
+
|
40 |
+
- Evaluate the performance of object detection models in a multi-domain context.
|
41 |
+
- Test the adaptability of models to real-world scenarios beyond common object recognition.
|
42 |
+
- Benchmark the capabilities of object detection models across diverse datasets, including those in healthcare, aerial imagery, and video games.
|
43 |
+
|
44 |
+
For more ideas and inspiration on real-world applications, be sure to check out [our guides on real-world projects](../../guides/index.md).
|
45 |
+
|
46 |
+
## Usage
|
47 |
+
|
48 |
+
The Roboflow 100 dataset is available on both [GitHub](https://github.com/roboflow/roboflow-100-benchmark) and [Roboflow Universe](https://universe.roboflow.com/roboflow-100).
|
49 |
+
|
50 |
+
You can access it directly from the Roboflow 100 GitHub repository. In addition, on Roboflow Universe, you have the flexibility to download individual datasets by simply clicking the export button within each dataset.
|
51 |
+
|
52 |
+
## Sample Data and Annotations
|
53 |
+
|
54 |
+
Roboflow 100 consists of datasets with diverse images and videos captured from various angles and domains. Here’s a look at examples of annotated images in the RF100 benchmark.
|
55 |
+
|
56 |
+
<p align="center">
|
57 |
+
<img width="640" src="https://blog.roboflow.com/content/images/2022/11/image-2.png" alt="Sample Data and Annotations">
|
58 |
+
</p>
|
59 |
+
|
60 |
+
The diversity in the Roboflow 100 benchmark that can be seen above is a significant advancement from traditional benchmarks which often focus on optimizing a single metric within a limited domain.
|
61 |
+
|
62 |
+
## Citations and Acknowledgments
|
63 |
+
|
64 |
+
If you use the Roboflow 100 dataset in your research or development work, please cite the following paper:
|
65 |
+
|
66 |
+
!!! Quote ""
|
67 |
+
|
68 |
+
=== "BibTeX"
|
69 |
+
|
70 |
+
```bibtex
|
71 |
+
@misc{2211.13523,
|
72 |
+
Author = {Floriana Ciaglia and Francesco Saverio Zuppichini and Paul Guerrie and Mark McQuade and Jacob Solawetz},
|
73 |
+
Title = {Roboflow 100: A Rich, Multi-Domain Object Detection Benchmark},
|
74 |
+
Eprint = {arXiv:2211.13523},
|
75 |
+
}
|
76 |
+
```
|
77 |
+
|
78 |
+
Our thanks go to the Roboflow team and all the contributors for their hard work in creating and sustaining the Roboflow 100 dataset.
|
79 |
+
|
80 |
+
If you are interested in exploring more datasets to enhance your object detection and machine learning projects, feel free to visit [our comprehensive dataset collection](../index.md).
|
docs/en/datasets/detect/sku-110k.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the SKU-110k dataset of densely packed retail shelf images for object detection research. Learn how to use it with Ultralytics.
|
4 |
+
keywords: SKU-110k dataset, object detection, retail shelf images, Ultralytics, YOLO, computer vision, deep learning models
|
5 |
+
---
|
6 |
+
|
7 |
+
# SKU-110k Dataset
|
8 |
+
|
9 |
+
The [SKU-110k](https://github.com/eg4000/SKU110K_CVPR19) dataset is a collection of densely packed retail shelf images, designed to support research in object detection tasks. Developed by Eran Goldman et al., the dataset contains over 110,000 unique store keeping unit (SKU) categories with densely packed objects, often looking similar or even identical, positioned in close proximity.
|
10 |
+
|
11 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/277141199-e7cdd803-237e-4b4a-9171-f95cba9388f9.jpg)
|
12 |
+
|
13 |
+
## Key Features
|
14 |
+
|
15 |
+
- SKU-110k contains images of store shelves from around the world, featuring densely packed objects that pose challenges for state-of-the-art object detectors.
|
16 |
+
- The dataset includes over 110,000 unique SKU categories, providing a diverse range of object appearances.
|
17 |
+
- Annotations include bounding boxes for objects and SKU category labels.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
The SKU-110k dataset is organized into three main subsets:
|
22 |
+
|
23 |
+
1. **Training set**: This subset contains images and annotations used for training object detection models.
|
24 |
+
2. **Validation set**: This subset consists of images and annotations used for model validation during training.
|
25 |
+
3. **Test set**: This subset is designed for the final evaluation of trained object detection models.
|
26 |
+
|
27 |
+
## Applications
|
28 |
+
|
29 |
+
The SKU-110k dataset is widely used for training and evaluating deep learning models in object detection tasks, especially in densely packed scenes such as retail shelf displays. The dataset's diverse set of SKU categories and densely packed object arrangements make it a valuable resource for researchers and practitioners in the field of computer vision.
|
30 |
+
|
31 |
+
## Dataset YAML
|
32 |
+
|
33 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. For the case of the SKU-110K dataset, the `SKU-110K.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/SKU-110K.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/SKU-110K.yaml).
|
34 |
+
|
35 |
+
!!! Example "ultralytics/cfg/datasets/SKU-110K.yaml"
|
36 |
+
|
37 |
+
```yaml
|
38 |
+
--8<-- "ultralytics/cfg/datasets/SKU-110K.yaml"
|
39 |
+
```
|
40 |
+
|
41 |
+
## Usage
|
42 |
+
|
43 |
+
To train a YOLOv8n model on the SKU-110K dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
44 |
+
|
45 |
+
!!! Example "Train Example"
|
46 |
+
|
47 |
+
=== "Python"
|
48 |
+
|
49 |
+
```python
|
50 |
+
from ultralytics import YOLO
|
51 |
+
|
52 |
+
# Load a model
|
53 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
54 |
+
|
55 |
+
# Train the model
|
56 |
+
results = model.train(data='SKU-110K.yaml', epochs=100, imgsz=640)
|
57 |
+
```
|
58 |
+
|
59 |
+
=== "CLI"
|
60 |
+
|
61 |
+
```bash
|
62 |
+
# Start training from a pretrained *.pt model
|
63 |
+
yolo detect train data=SKU-110K.yaml model=yolov8n.pt epochs=100 imgsz=640
|
64 |
+
```
|
65 |
+
|
66 |
+
## Sample Data and Annotations
|
67 |
+
|
68 |
+
The SKU-110k dataset contains a diverse set of retail shelf images with densely packed objects, providing rich context for object detection tasks. Here are some examples of data from the dataset, along with their corresponding annotations:
|
69 |
+
|
70 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/277141197-b63e4aa5-12f6-4673-96a7-9a5207363c59.jpg)
|
71 |
+
|
72 |
+
- **Densely packed retail shelf image**: This image demonstrates an example of densely packed objects in a retail shelf setting. Objects are annotated with bounding boxes and SKU category labels.
|
73 |
+
|
74 |
+
The example showcases the variety and complexity of the data in the SKU-110k dataset and highlights the importance of high-quality data for object detection tasks.
|
75 |
+
|
76 |
+
## Citations and Acknowledgments
|
77 |
+
|
78 |
+
If you use the SKU-110k dataset in your research or development work, please cite the following paper:
|
79 |
+
|
80 |
+
!!! Quote ""
|
81 |
+
|
82 |
+
=== "BibTeX"
|
83 |
+
|
84 |
+
```bibtex
|
85 |
+
@inproceedings{goldman2019dense,
|
86 |
+
author = {Eran Goldman and Roei Herzig and Aviv Eisenschtat and Jacob Goldberger and Tal Hassner},
|
87 |
+
title = {Precise Detection in Densely Packed Scenes},
|
88 |
+
booktitle = {Proc. Conf. Comput. Vision Pattern Recognition (CVPR)},
|
89 |
+
year = {2019}
|
90 |
+
}
|
91 |
+
```
|
92 |
+
|
93 |
+
We would like to acknowledge Eran Goldman et al. for creating and maintaining the SKU-110k dataset as a valuable resource for the computer vision research community. For more information about the SKU-110k dataset and its creators, visit the [SKU-110k dataset GitHub repository](https://github.com/eg4000/SKU110K_CVPR19).
|
docs/en/datasets/detect/visdrone.md
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the VisDrone Dataset, a large-scale benchmark for drone-based image analysis, and learn how to train a YOLO model using it.
|
4 |
+
keywords: VisDrone Dataset, Ultralytics, drone-based image analysis, YOLO model, object detection, object tracking, crowd counting
|
5 |
+
---
|
6 |
+
|
7 |
+
# VisDrone Dataset
|
8 |
+
|
9 |
+
The [VisDrone Dataset](https://github.com/VisDrone/VisDrone-Dataset) is a large-scale benchmark created by the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China. It contains carefully annotated ground truth data for various computer vision tasks related to drone-based image and video analysis.
|
10 |
+
|
11 |
+
VisDrone is composed of 288 video clips with 261,908 frames and 10,209 static images, captured by various drone-mounted cameras. The dataset covers a wide range of aspects, including location (14 different cities across China), environment (urban and rural), objects (pedestrians, vehicles, bicycles, etc.), and density (sparse and crowded scenes). The dataset was collected using various drone platforms under different scenarios and weather and lighting conditions. These frames are manually annotated with over 2.6 million bounding boxes of targets such as pedestrians, cars, bicycles, and tricycles. Attributes like scene visibility, object class, and occlusion are also provided for better data utilization.
|
12 |
+
|
13 |
+
## Dataset Structure
|
14 |
+
|
15 |
+
The VisDrone dataset is organized into five main subsets, each focusing on a specific task:
|
16 |
+
|
17 |
+
1. **Task 1**: Object detection in images
|
18 |
+
2. **Task 2**: Object detection in videos
|
19 |
+
3. **Task 3**: Single-object tracking
|
20 |
+
4. **Task 4**: Multi-object tracking
|
21 |
+
5. **Task 5**: Crowd counting
|
22 |
+
|
23 |
+
## Applications
|
24 |
+
|
25 |
+
The VisDrone dataset is widely used for training and evaluating deep learning models in drone-based computer vision tasks such as object detection, object tracking, and crowd counting. The dataset's diverse set of sensor data, object annotations, and attributes make it a valuable resource for researchers and practitioners in the field of drone-based computer vision.
|
26 |
+
|
27 |
+
## Dataset YAML
|
28 |
+
|
29 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the Visdrone dataset, the `VisDrone.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/VisDrone.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/VisDrone.yaml).
|
30 |
+
|
31 |
+
!!! Example "ultralytics/cfg/datasets/VisDrone.yaml"
|
32 |
+
|
33 |
+
```yaml
|
34 |
+
--8<-- "ultralytics/cfg/datasets/VisDrone.yaml"
|
35 |
+
```
|
36 |
+
|
37 |
+
## Usage
|
38 |
+
|
39 |
+
To train a YOLOv8n model on the VisDrone dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
40 |
+
|
41 |
+
!!! Example "Train Example"
|
42 |
+
|
43 |
+
=== "Python"
|
44 |
+
|
45 |
+
```python
|
46 |
+
from ultralytics import YOLO
|
47 |
+
|
48 |
+
# Load a model
|
49 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
50 |
+
|
51 |
+
# Train the model
|
52 |
+
results = model.train(data='VisDrone.yaml', epochs=100, imgsz=640)
|
53 |
+
```
|
54 |
+
|
55 |
+
=== "CLI"
|
56 |
+
|
57 |
+
```bash
|
58 |
+
# Start training from a pretrained *.pt model
|
59 |
+
yolo detect train data=VisDrone.yaml model=yolov8n.pt epochs=100 imgsz=640
|
60 |
+
```
|
61 |
+
|
62 |
+
## Sample Data and Annotations
|
63 |
+
|
64 |
+
The VisDrone dataset contains a diverse set of images and videos captured by drone-mounted cameras. Here are some examples of data from the dataset, along with their corresponding annotations:
|
65 |
+
|
66 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/238217600-df0b7334-4c9e-4c77-81a5-c70cd33429cc.jpg)
|
67 |
+
|
68 |
+
- **Task 1**: Object detection in images - This image demonstrates an example of object detection in images, where objects are annotated with bounding boxes. The dataset provides a wide variety of images taken from different locations, environments, and densities to facilitate the development of models for this task.
|
69 |
+
|
70 |
+
The example showcases the variety and complexity of the data in the VisDrone dataset and highlights the importance of high-quality sensor data for drone-based computer vision tasks.
|
71 |
+
|
72 |
+
## Citations and Acknowledgments
|
73 |
+
|
74 |
+
If you use the VisDrone dataset in your research or development work, please cite the following paper:
|
75 |
+
|
76 |
+
!!! Quote ""
|
77 |
+
|
78 |
+
=== "BibTeX"
|
79 |
+
|
80 |
+
```bibtex
|
81 |
+
@ARTICLE{9573394,
|
82 |
+
author={Zhu, Pengfei and Wen, Longyin and Du, Dawei and Bian, Xiao and Fan, Heng and Hu, Qinghua and Ling, Haibin},
|
83 |
+
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
84 |
+
title={Detection and Tracking Meet Drones Challenge},
|
85 |
+
year={2021},
|
86 |
+
volume={},
|
87 |
+
number={},
|
88 |
+
pages={1-1},
|
89 |
+
doi={10.1109/TPAMI.2021.3119563}}
|
90 |
+
```
|
91 |
+
|
92 |
+
We would like to acknowledge the AISKYEYE team at the Lab of Machine Learning and Data Mining, Tianjin University, China, for creating and maintaining the VisDrone dataset as a valuable resource for the drone-based computer vision research community. For more information about the VisDrone dataset and its creators, visit the [VisDrone Dataset GitHub repository](https://github.com/VisDrone/VisDrone-Dataset).
|
docs/en/datasets/detect/voc.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: A complete guide to the PASCAL VOC dataset used for object detection, segmentation and classification tasks with relevance to YOLO model training.
|
4 |
+
keywords: Ultralytics, PASCAL VOC dataset, object detection, segmentation, image classification, YOLO, model training, VOC.yaml, deep learning
|
5 |
+
---
|
6 |
+
|
7 |
+
# VOC Dataset
|
8 |
+
|
9 |
+
The [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/) (Visual Object Classes) dataset is a well-known object detection, segmentation, and classification dataset. It is designed to encourage research on a wide variety of object categories and is commonly used for benchmarking computer vision models. It is an essential dataset for researchers and developers working on object detection, segmentation, and classification tasks.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- VOC dataset includes two main challenges: VOC2007 and VOC2012.
|
14 |
+
- The dataset comprises 20 object categories, including common objects like cars, bicycles, and animals, as well as more specific categories such as boats, sofas, and dining tables.
|
15 |
+
- Annotations include object bounding boxes and class labels for object detection and classification tasks, and segmentation masks for the segmentation tasks.
|
16 |
+
- VOC provides standardized evaluation metrics like mean Average Precision (mAP) for object detection and classification, making it suitable for comparing model performance.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The VOC dataset is split into three subsets:
|
21 |
+
|
22 |
+
1. **Train**: This subset contains images for training object detection, segmentation, and classification models.
|
23 |
+
2. **Validation**: This subset has images used for validation purposes during model training.
|
24 |
+
3. **Test**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [PASCAL VOC evaluation server](http://host.robots.ox.ac.uk:8080/leaderboard/displaylb.php) for performance evaluation.
|
25 |
+
|
26 |
+
## Applications
|
27 |
+
|
28 |
+
The VOC dataset is widely used for training and evaluating deep learning models in object detection (such as YOLO, Faster R-CNN, and SSD), instance segmentation (such as Mask R-CNN), and image classification. The dataset's diverse set of object categories, large number of annotated images, and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners.
|
29 |
+
|
30 |
+
## Dataset YAML
|
31 |
+
|
32 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the VOC dataset, the `VOC.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/VOC.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/VOC.yaml).
|
33 |
+
|
34 |
+
!!! Example "ultralytics/cfg/datasets/VOC.yaml"
|
35 |
+
|
36 |
+
```yaml
|
37 |
+
--8<-- "ultralytics/cfg/datasets/VOC.yaml"
|
38 |
+
```
|
39 |
+
|
40 |
+
## Usage
|
41 |
+
|
42 |
+
To train a YOLOv8n model on the VOC dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
43 |
+
|
44 |
+
!!! Example "Train Example"
|
45 |
+
|
46 |
+
=== "Python"
|
47 |
+
|
48 |
+
```python
|
49 |
+
from ultralytics import YOLO
|
50 |
+
|
51 |
+
# Load a model
|
52 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
53 |
+
|
54 |
+
# Train the model
|
55 |
+
results = model.train(data='VOC.yaml', epochs=100, imgsz=640)
|
56 |
+
```
|
57 |
+
|
58 |
+
=== "CLI"
|
59 |
+
|
60 |
+
```bash
|
61 |
+
# Start training from a pretrained *.pt model
|
62 |
+
yolo detect train data=VOC.yaml model=yolov8n.pt epochs=100 imgsz=640
|
63 |
+
```
|
64 |
+
|
65 |
+
## Sample Images and Annotations
|
66 |
+
|
67 |
+
The VOC dataset contains a diverse set of images with various object categories and complex scenes. Here are some examples of images from the dataset, along with their corresponding annotations:
|
68 |
+
|
69 |
+
![Dataset sample image](https://github.com/ultralytics/ultralytics/assets/26833433/7d4c18f4-774e-43f8-a5f3-9467cda7de4a)
|
70 |
+
|
71 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
72 |
+
|
73 |
+
The example showcases the variety and complexity of the images in the VOC dataset and the benefits of using mosaicing during the training process.
|
74 |
+
|
75 |
+
## Citations and Acknowledgments
|
76 |
+
|
77 |
+
If you use the VOC dataset in your research or development work, please cite the following paper:
|
78 |
+
|
79 |
+
!!! Quote ""
|
80 |
+
|
81 |
+
=== "BibTeX"
|
82 |
+
|
83 |
+
```bibtex
|
84 |
+
@misc{everingham2010pascal,
|
85 |
+
title={The PASCAL Visual Object Classes (VOC) Challenge},
|
86 |
+
author={Mark Everingham and Luc Van Gool and Christopher K. I. Williams and John Winn and Andrew Zisserman},
|
87 |
+
year={2010},
|
88 |
+
eprint={0909.5206},
|
89 |
+
archivePrefix={arXiv},
|
90 |
+
primaryClass={cs.CV}
|
91 |
+
}
|
92 |
+
```
|
93 |
+
|
94 |
+
We would like to acknowledge the PASCAL VOC Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the VOC dataset and its creators, visit the [PASCAL VOC dataset website](http://host.robots.ox.ac.uk/pascal/VOC/).
|
docs/en/datasets/detect/xview.md
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore xView, a large-scale, high resolution satellite imagery dataset for object detection. Dive into dataset structure, usage examples & its potential applications.
|
4 |
+
keywords: Ultralytics, YOLO, computer vision, xView dataset, satellite imagery, object detection, overhead imagery, training, deep learning, dataset YAML
|
5 |
+
---
|
6 |
+
|
7 |
+
# xView Dataset
|
8 |
+
|
9 |
+
The [xView](http://xviewdataset.org/) dataset is one of the largest publicly available datasets of overhead imagery, containing images from complex scenes around the world annotated using bounding boxes. The goal of the xView dataset is to accelerate progress in four computer vision frontiers:
|
10 |
+
|
11 |
+
1. Reduce minimum resolution for detection.
|
12 |
+
2. Improve learning efficiency.
|
13 |
+
3. Enable discovery of more object classes.
|
14 |
+
4. Improve detection of fine-grained classes.
|
15 |
+
|
16 |
+
xView builds on the success of challenges like Common Objects in Context (COCO) and aims to leverage computer vision to analyze the growing amount of available imagery from space in order to understand the visual world in new ways and address a range of important applications.
|
17 |
+
|
18 |
+
## Key Features
|
19 |
+
|
20 |
+
- xView contains over 1 million object instances across 60 classes.
|
21 |
+
- The dataset has a resolution of 0.3 meters, providing higher resolution imagery than most public satellite imagery datasets.
|
22 |
+
- xView features a diverse collection of small, rare, fine-grained, and multi-type objects with bounding box annotation.
|
23 |
+
- Comes with a pre-trained baseline model using the TensorFlow object detection API and an example for PyTorch.
|
24 |
+
|
25 |
+
## Dataset Structure
|
26 |
+
|
27 |
+
The xView dataset is composed of satellite images collected from WorldView-3 satellites at a 0.3m ground sample distance. It contains over 1 million objects across 60 classes in over 1,400 km² of imagery.
|
28 |
+
|
29 |
+
## Applications
|
30 |
+
|
31 |
+
The xView dataset is widely used for training and evaluating deep learning models for object detection in overhead imagery. The dataset's diverse set of object classes and high-resolution imagery make it a valuable resource for researchers and practitioners in the field of computer vision, especially for satellite imagery analysis.
|
32 |
+
|
33 |
+
## Dataset YAML
|
34 |
+
|
35 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the xView dataset, the `xView.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/xView.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/xView.yaml).
|
36 |
+
|
37 |
+
!!! Example "ultralytics/cfg/datasets/xView.yaml"
|
38 |
+
|
39 |
+
```yaml
|
40 |
+
--8<-- "ultralytics/cfg/datasets/xView.yaml"
|
41 |
+
```
|
42 |
+
|
43 |
+
## Usage
|
44 |
+
|
45 |
+
To train a model on the xView dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
46 |
+
|
47 |
+
!!! Example "Train Example"
|
48 |
+
|
49 |
+
=== "Python"
|
50 |
+
|
51 |
+
```python
|
52 |
+
from ultralytics import YOLO
|
53 |
+
|
54 |
+
# Load a model
|
55 |
+
model = YOLO('yolov8n.pt') # load a pretrained model (recommended for training)
|
56 |
+
|
57 |
+
# Train the model
|
58 |
+
results = model.train(data='xView.yaml', epochs=100, imgsz=640)
|
59 |
+
```
|
60 |
+
|
61 |
+
=== "CLI"
|
62 |
+
|
63 |
+
```bash
|
64 |
+
# Start training from a pretrained *.pt model
|
65 |
+
yolo detect train data=xView.yaml model=yolov8n.pt epochs=100 imgsz=640
|
66 |
+
```
|
67 |
+
|
68 |
+
## Sample Data and Annotations
|
69 |
+
|
70 |
+
The xView dataset contains high-resolution satellite images with a diverse set of objects annotated using bounding boxes. Here are some examples of data from the dataset, along with their corresponding annotations:
|
71 |
+
|
72 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/277141257-ae6ba4de-5dcb-4c76-bc05-bc1e386361ba.jpg)
|
73 |
+
|
74 |
+
- **Overhead Imagery**: This image demonstrates an example of object detection in overhead imagery, where objects are annotated with bounding boxes. The dataset provides high-resolution satellite images to facilitate the development of models for this task.
|
75 |
+
|
76 |
+
The example showcases the variety and complexity of the data in the xView dataset and highlights the importance of high-quality satellite imagery for object detection tasks.
|
77 |
+
|
78 |
+
## Citations and Acknowledgments
|
79 |
+
|
80 |
+
If you use the xView dataset in your research or development work, please cite the following paper:
|
81 |
+
|
82 |
+
!!! Quote ""
|
83 |
+
|
84 |
+
=== "BibTeX"
|
85 |
+
|
86 |
+
```bibtex
|
87 |
+
@misc{lam2018xview,
|
88 |
+
title={xView: Objects in Context in Overhead Imagery},
|
89 |
+
author={Darius Lam and Richard Kuzma and Kevin McGee and Samuel Dooley and Michael Laielli and Matthew Klaric and Yaroslav Bulatov and Brendan McCord},
|
90 |
+
year={2018},
|
91 |
+
eprint={1802.07856},
|
92 |
+
archivePrefix={arXiv},
|
93 |
+
primaryClass={cs.CV}
|
94 |
+
}
|
95 |
+
```
|
96 |
+
|
97 |
+
We would like to acknowledge the [Defense Innovation Unit](https://www.diu.mil/) (DIU) and the creators of the xView dataset for their valuable contribution to the computer vision research community. For more information about the xView dataset and its creators, visit the [xView dataset website](http://xviewdataset.org/).
|
docs/en/datasets/explorer/api.md
ADDED
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore and analyze CV datasets with Ultralytics Explorer API, offering SQL, vector similarity, and semantic searches for efficient dataset insights.
|
4 |
+
keywords: Ultralytics Explorer API, Dataset Exploration, SQL Queries, Vector Similarity Search, Semantic Search, Embeddings Table, Image Similarity, Python API for Datasets, CV Dataset Analysis, LanceDB Integration
|
5 |
+
---
|
6 |
+
|
7 |
+
# Ultralytics Explorer API
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
<a href="https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/docs/en/datasets/explorer/explorer.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
|
12 |
+
The Explorer API is a Python API for exploring your datasets. It supports filtering and searching your dataset using SQL queries, vector similarity search and semantic search.
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<br>
|
16 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/3VryynorQeo?start=279"
|
17 |
+
title="YouTube video player" frameborder="0"
|
18 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
19 |
+
allowfullscreen>
|
20 |
+
</iframe>
|
21 |
+
<br>
|
22 |
+
<strong>Watch:</strong> Ultralytics Explorer API Overview
|
23 |
+
</p>
|
24 |
+
|
25 |
+
## Installation
|
26 |
+
|
27 |
+
Explorer depends on external libraries for some of its functionality. These are automatically installed on usage. To manually install these dependencies, use the following command:
|
28 |
+
|
29 |
+
```bash
|
30 |
+
pip install ultralytics[explorer]
|
31 |
+
```
|
32 |
+
|
33 |
+
## Usage
|
34 |
+
|
35 |
+
```python
|
36 |
+
from ultralytics import Explorer
|
37 |
+
|
38 |
+
# Create an Explorer object
|
39 |
+
explorer = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
40 |
+
|
41 |
+
# Create embeddings for your dataset
|
42 |
+
explorer.create_embeddings_table()
|
43 |
+
|
44 |
+
# Search for similar images to a given image/images
|
45 |
+
dataframe = explorer.get_similar(img='path/to/image.jpg')
|
46 |
+
|
47 |
+
# Or search for similar images to a given index/indices
|
48 |
+
dataframe = explorer.get_similar(idx=0)
|
49 |
+
```
|
50 |
+
|
51 |
+
!!! Tip "Note"
|
52 |
+
|
53 |
+
Embeddings table for a given dataset and model pair is only created once and reused. These use [LanceDB](https://lancedb.github.io/lancedb/) under the hood, which scales on-disk, so you can create and reuse embeddings for large datasets like COCO without running out of memory.
|
54 |
+
|
55 |
+
In case you want to force update the embeddings table, you can pass `force=True` to `create_embeddings_table` method.
|
56 |
+
You can directly access the LanceDB table object to perform advanced analysis. Learn more about it in [Working with table section](#4-advanced---working-with-embeddings-table)
|
57 |
+
|
58 |
+
## 1. Similarity Search
|
59 |
+
|
60 |
+
Similarity search is a technique for finding similar images to a given image. It is based on the idea that similar images will have similar embeddings. Once the embeddings table is built, you can get run semantic search in any of the following ways:
|
61 |
+
|
62 |
+
- On a given index or list of indices in the dataset: `exp.get_similar(idx=[1,10], limit=10)`
|
63 |
+
- On any image or list of images not in the dataset: `exp.get_similar(img=["path/to/img1", "path/to/img2"], limit=10)`
|
64 |
+
|
65 |
+
In case of multiple inputs, the aggregate of their embeddings is used.
|
66 |
+
|
67 |
+
You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering
|
68 |
+
|
69 |
+
!!! Example "Semantic Search"
|
70 |
+
|
71 |
+
=== "Using Images"
|
72 |
+
|
73 |
+
```python
|
74 |
+
from ultralytics import Explorer
|
75 |
+
|
76 |
+
# create an Explorer object
|
77 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
78 |
+
exp.create_embeddings_table()
|
79 |
+
|
80 |
+
similar = exp.get_similar(img='https://ultralytics.com/images/bus.jpg', limit=10)
|
81 |
+
print(similar.head())
|
82 |
+
|
83 |
+
# Search using multiple indices
|
84 |
+
similar = exp.get_similar(
|
85 |
+
img=['https://ultralytics.com/images/bus.jpg',
|
86 |
+
'https://ultralytics.com/images/bus.jpg'],
|
87 |
+
limit=10
|
88 |
+
)
|
89 |
+
print(similar.head())
|
90 |
+
```
|
91 |
+
|
92 |
+
=== "Using Dataset Indices"
|
93 |
+
|
94 |
+
```python
|
95 |
+
from ultralytics import Explorer
|
96 |
+
|
97 |
+
# create an Explorer object
|
98 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
99 |
+
exp.create_embeddings_table()
|
100 |
+
|
101 |
+
similar = exp.get_similar(idx=1, limit=10)
|
102 |
+
print(similar.head())
|
103 |
+
|
104 |
+
# Search using multiple indices
|
105 |
+
similar = exp.get_similar(idx=[1,10], limit=10)
|
106 |
+
print(similar.head())
|
107 |
+
```
|
108 |
+
|
109 |
+
### Plotting Similar Images
|
110 |
+
|
111 |
+
You can also plot the similar images using the `plot_similar` method. This method takes the same arguments as `get_similar` and plots the similar images in a grid.
|
112 |
+
|
113 |
+
!!! Example "Plotting Similar Images"
|
114 |
+
|
115 |
+
=== "Using Images"
|
116 |
+
|
117 |
+
```python
|
118 |
+
from ultralytics import Explorer
|
119 |
+
|
120 |
+
# create an Explorer object
|
121 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
122 |
+
exp.create_embeddings_table()
|
123 |
+
|
124 |
+
plt = exp.plot_similar(img='https://ultralytics.com/images/bus.jpg', limit=10)
|
125 |
+
plt.show()
|
126 |
+
```
|
127 |
+
|
128 |
+
=== "Using Dataset Indices"
|
129 |
+
|
130 |
+
```python
|
131 |
+
from ultralytics import Explorer
|
132 |
+
|
133 |
+
# create an Explorer object
|
134 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
135 |
+
exp.create_embeddings_table()
|
136 |
+
|
137 |
+
plt = exp.plot_similar(idx=1, limit=10)
|
138 |
+
plt.show()
|
139 |
+
```
|
140 |
+
|
141 |
+
## 2. Ask AI (Natural Language Querying)
|
142 |
+
|
143 |
+
This allows you to write how you want to filter your dataset using natural language. You don't have to be proficient in writing SQL queries. Our AI powered query generator will automatically do that under the hood. For example - you can say - "show me 100 images with exactly one person and 2 dogs. There can be other objects too" and it'll internally generate the query and show you those results.
|
144 |
+
Note: This works using LLMs under the hood so the results are probabilistic and might get things wrong sometimes
|
145 |
+
|
146 |
+
!!! Example "Ask AI"
|
147 |
+
|
148 |
+
```python
|
149 |
+
from ultralytics import Explorer
|
150 |
+
from ultralytics.data.explorer import plot_query_result
|
151 |
+
|
152 |
+
|
153 |
+
# create an Explorer object
|
154 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
155 |
+
exp.create_embeddings_table()
|
156 |
+
|
157 |
+
df = exp.ask_ai("show me 100 images with exactly one person and 2 dogs. There can be other objects too")
|
158 |
+
print(df.head())
|
159 |
+
|
160 |
+
# plot the results
|
161 |
+
plt = plot_query_result(df)
|
162 |
+
plt.show()
|
163 |
+
```
|
164 |
+
|
165 |
+
## 3. SQL Querying
|
166 |
+
|
167 |
+
You can run SQL queries on your dataset using the `sql_query` method. This method takes a SQL query as input and returns a pandas dataframe with the results.
|
168 |
+
|
169 |
+
!!! Example "SQL Query"
|
170 |
+
|
171 |
+
```python
|
172 |
+
from ultralytics import Explorer
|
173 |
+
|
174 |
+
# create an Explorer object
|
175 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
176 |
+
exp.create_embeddings_table()
|
177 |
+
|
178 |
+
df = exp.sql_query("WHERE labels LIKE '%person%' AND labels LIKE '%dog%'")
|
179 |
+
print(df.head())
|
180 |
+
```
|
181 |
+
|
182 |
+
### Plotting SQL Query Results
|
183 |
+
|
184 |
+
You can also plot the results of a SQL query using the `plot_sql_query` method. This method takes the same arguments as `sql_query` and plots the results in a grid.
|
185 |
+
|
186 |
+
!!! Example "Plotting SQL Query Results"
|
187 |
+
|
188 |
+
```python
|
189 |
+
from ultralytics import Explorer
|
190 |
+
|
191 |
+
# create an Explorer object
|
192 |
+
exp = Explorer(data='coco128.yaml', model='yolov8n.pt')
|
193 |
+
exp.create_embeddings_table()
|
194 |
+
|
195 |
+
# plot the SQL Query
|
196 |
+
exp.plot_sql_query("WHERE labels LIKE '%person%' AND labels LIKE '%dog%' LIMIT 10")
|
197 |
+
```
|
198 |
+
|
199 |
+
## 4. Advanced - Working with Embeddings Table
|
200 |
+
|
201 |
+
You can also work with the embeddings table directly. Once the embeddings table is created, you can access it using the `Explorer.table`
|
202 |
+
|
203 |
+
!!! Tip "Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre- and post-filters, etc."
|
204 |
+
|
205 |
+
```python
|
206 |
+
from ultralytics import Explorer
|
207 |
+
|
208 |
+
exp = Explorer()
|
209 |
+
exp.create_embeddings_table()
|
210 |
+
table = exp.table
|
211 |
+
```
|
212 |
+
|
213 |
+
Here are some examples of what you can do with the table:
|
214 |
+
|
215 |
+
### Get raw Embeddings
|
216 |
+
|
217 |
+
!!! Example
|
218 |
+
|
219 |
+
```python
|
220 |
+
from ultralytics import Explorer
|
221 |
+
|
222 |
+
exp = Explorer()
|
223 |
+
exp.create_embeddings_table()
|
224 |
+
table = exp.table
|
225 |
+
|
226 |
+
embeddings = table.to_pandas()["vector"]
|
227 |
+
print(embeddings)
|
228 |
+
```
|
229 |
+
|
230 |
+
### Advanced Querying with pre- and post-filters
|
231 |
+
|
232 |
+
!!! Example
|
233 |
+
|
234 |
+
```python
|
235 |
+
from ultralytics import Explorer
|
236 |
+
|
237 |
+
exp = Explorer(model="yolov8n.pt")
|
238 |
+
exp.create_embeddings_table()
|
239 |
+
table = exp.table
|
240 |
+
|
241 |
+
# Dummy embedding
|
242 |
+
embedding = [i for i in range(256)]
|
243 |
+
rs = table.search(embedding).metric("cosine").where("").limit(10)
|
244 |
+
```
|
245 |
+
|
246 |
+
### Create Vector Index
|
247 |
+
|
248 |
+
When using large datasets, you can also create a dedicated vector index for faster querying. This is done using the `create_index` method on LanceDB table.
|
249 |
+
|
250 |
+
```python
|
251 |
+
table.create_index(num_partitions=..., num_sub_vectors=...)
|
252 |
+
```
|
253 |
+
|
254 |
+
Find more details on the type vector indices available and parameters [here](https://lancedb.github.io/lancedb/ann_indexes/#types-of-index) In the future, we will add support for creating vector indices directly from Explorer API.
|
255 |
+
|
256 |
+
## 5. Embeddings Applications
|
257 |
+
|
258 |
+
You can use the embeddings table to perform a variety of exploratory analysis. Here are some examples:
|
259 |
+
|
260 |
+
### Similarity Index
|
261 |
+
|
262 |
+
Explorer comes with a `similarity_index` operation:
|
263 |
+
|
264 |
+
- It tries to estimate how similar each data point is with the rest of the dataset.
|
265 |
+
- It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.
|
266 |
+
|
267 |
+
It returns a pandas dataframe with the following columns:
|
268 |
+
|
269 |
+
- `idx`: Index of the image in the dataset
|
270 |
+
- `im_file`: Path to the image file
|
271 |
+
- `count`: Number of images in the dataset that are closer than `max_dist` to the current image
|
272 |
+
- `sim_im_files`: List of paths to the `count` similar images
|
273 |
+
|
274 |
+
!!! Tip
|
275 |
+
|
276 |
+
For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.
|
277 |
+
|
278 |
+
!!! Example "Similarity Index"
|
279 |
+
|
280 |
+
```python
|
281 |
+
from ultralytics import Explorer
|
282 |
+
|
283 |
+
exp = Explorer()
|
284 |
+
exp.create_embeddings_table()
|
285 |
+
|
286 |
+
sim_idx = exp.similarity_index()
|
287 |
+
```
|
288 |
+
|
289 |
+
You can use similarity index to build custom conditions to filter out the dataset. For example, you can filter out images that are not similar to any other image in the dataset using the following code:
|
290 |
+
|
291 |
+
```python
|
292 |
+
import numpy as np
|
293 |
+
|
294 |
+
sim_count = np.array(sim_idx["count"])
|
295 |
+
sim_idx['im_file'][sim_count > 30]
|
296 |
+
```
|
297 |
+
|
298 |
+
### Visualize Embedding Space
|
299 |
+
|
300 |
+
You can also visualize the embedding space using the plotting tool of your choice. For example here is a simple example using matplotlib:
|
301 |
+
|
302 |
+
```python
|
303 |
+
import numpy as np
|
304 |
+
from sklearn.decomposition import PCA
|
305 |
+
import matplotlib.pyplot as plt
|
306 |
+
from mpl_toolkits.mplot3d import Axes3D
|
307 |
+
|
308 |
+
# Reduce dimensions using PCA to 3 components for visualization in 3D
|
309 |
+
pca = PCA(n_components=3)
|
310 |
+
reduced_data = pca.fit_transform(embeddings)
|
311 |
+
|
312 |
+
# Create a 3D scatter plot using Matplotlib Axes3D
|
313 |
+
fig = plt.figure(figsize=(8, 6))
|
314 |
+
ax = fig.add_subplot(111, projection='3d')
|
315 |
+
|
316 |
+
# Scatter plot
|
317 |
+
ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)
|
318 |
+
ax.set_title('3D Scatter Plot of Reduced 256-Dimensional Data (PCA)')
|
319 |
+
ax.set_xlabel('Component 1')
|
320 |
+
ax.set_ylabel('Component 2')
|
321 |
+
ax.set_zlabel('Component 3')
|
322 |
+
|
323 |
+
plt.show()
|
324 |
+
```
|
325 |
+
|
326 |
+
Start creating your own CV dataset exploration reports using the Explorer API. For inspiration, check out the
|
327 |
+
|
328 |
+
## Apps Built Using Ultralytics Explorer
|
329 |
+
|
330 |
+
Try our GUI Demo based on Explorer API
|
331 |
+
|
332 |
+
## Coming Soon
|
333 |
+
|
334 |
+
- [ ] Merge specific labels from datasets. Example - Import all `person` labels from COCO and `car` labels from Cityscapes
|
335 |
+
- [ ] Remove images that have a higher similarity index than the given threshold
|
336 |
+
- [ ] Automatically persist new datasets after merging/removing entries
|
337 |
+
- [ ] Advanced Dataset Visualizations
|
docs/en/datasets/explorer/dashboard.md
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn about Ultralytics Explorer GUI for semantic search, SQL queries, and AI-powered natural language search in CV datasets.
|
4 |
+
keywords: Ultralytics, Explorer GUI, semantic search, vector similarity search, AI queries, SQL queries, computer vision, dataset exploration, image search, OpenAI integration
|
5 |
+
---
|
6 |
+
|
7 |
+
# Explorer GUI
|
8 |
+
|
9 |
+
Explorer GUI is like a playground build using [Ultralytics Explorer API](api.md). It allows you to run semantic/vector similarity search, SQL queries and even search using natural language using our ask AI feature powered by LLMs.
|
10 |
+
|
11 |
+
<p>
|
12 |
+
<img width="1709" alt="Explorer Dashboard Screenshot 1" src="https://github.com/ultralytics/ultralytics/assets/15766192/feb1fe05-58c5-4173-a9ff-e611e3bba3d0">
|
13 |
+
</p>
|
14 |
+
|
15 |
+
<p align="center">
|
16 |
+
<br>
|
17 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/3VryynorQeo?start=306"
|
18 |
+
title="YouTube video player" frameborder="0"
|
19 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
20 |
+
allowfullscreen>
|
21 |
+
</iframe>
|
22 |
+
<br>
|
23 |
+
<strong>Watch:</strong> Ultralytics Explorer Dashboard Overview
|
24 |
+
</p>
|
25 |
+
|
26 |
+
### Installation
|
27 |
+
|
28 |
+
```bash
|
29 |
+
pip install ultralytics[explorer]
|
30 |
+
```
|
31 |
+
|
32 |
+
!!! note "Note"
|
33 |
+
|
34 |
+
Ask AI feature works using OpenAI, so you'll be prompted to set the api key for OpenAI when you first run the GUI.
|
35 |
+
You can set it like this - `yolo settings openai_api_key="..."`
|
36 |
+
|
37 |
+
## Semantic Search / Vector Similarity Search
|
38 |
+
|
39 |
+
Semantic search is a technique for finding similar images to a given image. It is based on the idea that similar images will have similar embeddings. In the UI, you can select one of more images and search for the images similar to them. This can be useful when you want to find images similar to a given image or a set of images that don't perform as expected.
|
40 |
+
|
41 |
+
For example:
|
42 |
+
In this VOC Exploration dashboard, user selects a couple airplane images like this:
|
43 |
+
<p>
|
44 |
+
<img width="1710" alt="Explorer Dashboard Screenshot 2" src="https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/3becdc1d-45dc-43b7-88ff-84ff0b443894">
|
45 |
+
</p>
|
46 |
+
|
47 |
+
On performing similarity search, you should see a similar result:
|
48 |
+
<p>
|
49 |
+
<img width="1710" alt="Explorer Dashboard Screenshot 3" src="https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/aeea2e16-bc2b-41bb-9aef-4a33bfa1a800">
|
50 |
+
</p>
|
51 |
+
|
52 |
+
## Ask AI
|
53 |
+
|
54 |
+
This allows you to write how you want to filter your dataset using natural language. You don't have to be proficient in writing SQL queries. Our AI powered query generator will automatically do that under the hood. For example - you can say - "show me 100 images with exactly one person and 2 dogs. There can be other objects too" and it'll internally generate the query and show you those results. Here's an example output when asked to "Show 10 images with exactly 5 persons" and you'll see a result like this:
|
55 |
+
<p>
|
56 |
+
<img width="1709" alt="Explorer Dashboard Screenshot 4" src="https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/55a67181-3b25-4d2f-b786-2a6a08a0cb6b">
|
57 |
+
</p>
|
58 |
+
|
59 |
+
Note: This works using LLMs under the hood so the results are probabilistic and might get things wrong sometimes
|
60 |
+
|
61 |
+
## Run SQL queries on your CV datasets
|
62 |
+
|
63 |
+
You can run SQL queries on your dataset to filter it. It also works if you only provide the WHERE clause. Example SQL query would show only the images that have at least one 1 person and 1 dog in them:
|
64 |
+
|
65 |
+
```sql
|
66 |
+
WHERE labels LIKE '%person%' AND labels LIKE '%dog%'
|
67 |
+
```
|
68 |
+
|
69 |
+
<p>
|
70 |
+
<img width="1707" alt="Explorer Dashboard Screenshot 5" src="https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/14fbb237-0b2d-4b7c-8f62-2fca4e6cc26f">
|
71 |
+
</p>
|
72 |
+
|
73 |
+
This is a Demo build using the Explorer API. You can use the API to build your own exploratory notebooks or scripts to get insights into your datasets. Learn more about the Explorer API [here](api.md).
|
docs/en/datasets/explorer/explorer.ipynb
ADDED
@@ -0,0 +1,601 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "aa923c26-81c8-4565-9277-1cb686e3702e",
|
6 |
+
"metadata": {
|
7 |
+
"id": "aa923c26-81c8-4565-9277-1cb686e3702e"
|
8 |
+
},
|
9 |
+
"source": [
|
10 |
+
"# VOC Exploration Example\n",
|
11 |
+
"<div align=\"center\">\n",
|
12 |
+
"\n",
|
13 |
+
" <a href=\"https://ultralytics.com/yolov8\" target=\"_blank\">\n",
|
14 |
+
" <img width=\"1024\", src=\"https://raw.githubusercontent.com/ultralytics/assets/main/yolov8/banner-yolov8.png\"></a>\n",
|
15 |
+
"\n",
|
16 |
+
" [中文](https://docs.ultralytics.com/zh/) | [한국어](https://docs.ultralytics.com/ko/) | [日本語](https://docs.ultralytics.com/ja/) | [Русский](https://docs.ultralytics.com/ru/) | [Deutsch](https://docs.ultralytics.com/de/) | [Français](https://docs.ultralytics.com/fr/) | [Español](https://docs.ultralytics.com/es/) | [Português](https://docs.ultralytics.com/pt/) | [हिन्दी](https://docs.ultralytics.com/hi/) | [العربية](https://docs.ultralytics.com/ar/)\n",
|
17 |
+
"\n",
|
18 |
+
" <a href=\"https://console.paperspace.com/github/ultralytics/ultralytics\"><img src=\"https://assets.paperspace.io/img/gradient-badge.svg\" alt=\"Run on Gradient\"/></a>\n",
|
19 |
+
" <a href=\"https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/examples/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
|
20 |
+
" <a href=\"https://www.kaggle.com/ultralytics/yolov8\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
|
21 |
+
"\n",
|
22 |
+
"Welcome to the Ultralytics Explorer API notebook! This notebook serves as the starting point for exploring the various resources available to help you get started with using Ultralytics to explore your datasets using with the power of semantic search. You can utilities out of the box that allow you to examine specific types of labels using vector search or even SQL queries.\n",
|
23 |
+
"\n",
|
24 |
+
"We hope that the resources in this notebook will help you get the most out of Ultralytics. Please browse the Explorer <a href=\"https://docs.ultralytics.com/\">Docs</a> for details, raise an issue on <a href=\"https://github.com/ultralytics/ultralytics\">GitHub</a> for support, and join our <a href=\"https://ultralytics.com/discord\">Discord</a> community for questions and discussions!\n",
|
25 |
+
"\n",
|
26 |
+
"Try `yolo explorer` powered by Exlorer API\n",
|
27 |
+
"\n",
|
28 |
+
"Simply `pip install ultralytics` and run `yolo explorer` in your terminal to run custom queries and semantic search on your datasets right inside your browser!\n",
|
29 |
+
"\n",
|
30 |
+
"</div>"
|
31 |
+
]
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"cell_type": "markdown",
|
35 |
+
"id": "2454d9ba-9db4-4b37-98e8-201ba285c92f",
|
36 |
+
"metadata": {
|
37 |
+
"id": "2454d9ba-9db4-4b37-98e8-201ba285c92f"
|
38 |
+
},
|
39 |
+
"source": [
|
40 |
+
"## Setup\n",
|
41 |
+
"Pip install `ultralytics` and [dependencies](https://github.com/ultralytics/ultralytics/blob/main/pyproject.toml) and check software and hardware."
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": null,
|
47 |
+
"id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41",
|
48 |
+
"metadata": {
|
49 |
+
"id": "433f3a4d-a914-42cb-b0b6-be84a84e5e41"
|
50 |
+
},
|
51 |
+
"outputs": [],
|
52 |
+
"source": [
|
53 |
+
"%pip install ultralytics[explorer] openai\n",
|
54 |
+
"import ultralytics\n",
|
55 |
+
"ultralytics.checks()"
|
56 |
+
]
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"cell_type": "code",
|
60 |
+
"execution_count": null,
|
61 |
+
"id": "ae602549-3419-4909-9f82-35cba515483f",
|
62 |
+
"metadata": {
|
63 |
+
"id": "ae602549-3419-4909-9f82-35cba515483f"
|
64 |
+
},
|
65 |
+
"outputs": [],
|
66 |
+
"source": [
|
67 |
+
"from ultralytics import Explorer"
|
68 |
+
]
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"cell_type": "markdown",
|
72 |
+
"id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c",
|
73 |
+
"metadata": {
|
74 |
+
"id": "d8c06350-be8e-45cf-b3a6-b5017bbd943c"
|
75 |
+
},
|
76 |
+
"source": [
|
77 |
+
"## Similarity search\n",
|
78 |
+
"Utilize the power of vector similarity search to find the similar data points in your dataset along with their distance in the embedding space. Simply create an embeddings table for the given dataset-model pair. It is only needed once and it is reused automatically.\n"
|
79 |
+
]
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"cell_type": "code",
|
83 |
+
"execution_count": null,
|
84 |
+
"id": "334619da-6deb-4b32-9fe0-74e0a79cee20",
|
85 |
+
"metadata": {
|
86 |
+
"id": "334619da-6deb-4b32-9fe0-74e0a79cee20"
|
87 |
+
},
|
88 |
+
"outputs": [],
|
89 |
+
"source": [
|
90 |
+
"exp = Explorer(\"VOC.yaml\", model=\"yolov8n.pt\")\n",
|
91 |
+
"exp.create_embeddings_table()"
|
92 |
+
]
|
93 |
+
},
|
94 |
+
{
|
95 |
+
"cell_type": "markdown",
|
96 |
+
"id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d",
|
97 |
+
"metadata": {
|
98 |
+
"id": "b6c5e42d-bc7e-4b4c-bde0-643072a2165d"
|
99 |
+
},
|
100 |
+
"source": [
|
101 |
+
"One the embeddings table is built, you can get run semantic search in any of the following ways:\n",
|
102 |
+
"- On a given index / list of indices in the dataset like - `exp.get_similar(idx=[1,10], limit=10)`\n",
|
103 |
+
"- On any image/ list of images not in the dataset - `exp.get_similar(img=[\"path/to/img1\", \"path/to/img2\"], limit=10)`\n",
|
104 |
+
"In case of multiple inputs, the aggregade of their embeddings is used.\n",
|
105 |
+
"\n",
|
106 |
+
"You get a pandas dataframe with the `limit` number of most similar data points to the input, along with their distance in the embedding space. You can use this dataset to perform further filtering\n",
|
107 |
+
"<img width=\"1120\" alt=\"Screenshot 2024-01-06 at 9 45 42 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/7742ac57-e22a-4cea-a0f9-2b2a257483c5\">\n"
|
108 |
+
]
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"cell_type": "code",
|
112 |
+
"execution_count": null,
|
113 |
+
"id": "b485f05b-d92d-42bc-8da7-5e361667b341",
|
114 |
+
"metadata": {
|
115 |
+
"id": "b485f05b-d92d-42bc-8da7-5e361667b341"
|
116 |
+
},
|
117 |
+
"outputs": [],
|
118 |
+
"source": [
|
119 |
+
"similar = exp.get_similar(idx=1, limit=10)\n",
|
120 |
+
"similar.head()"
|
121 |
+
]
|
122 |
+
},
|
123 |
+
{
|
124 |
+
"cell_type": "markdown",
|
125 |
+
"id": "acf4b489-2161-4176-a1fe-d1d067d8083d",
|
126 |
+
"metadata": {
|
127 |
+
"id": "acf4b489-2161-4176-a1fe-d1d067d8083d"
|
128 |
+
},
|
129 |
+
"source": [
|
130 |
+
"You can use the also plot the similar samples directly using the `plot_similar` util\n",
|
131 |
+
"<p>\n",
|
132 |
+
"\n",
|
133 |
+
" <img src=\"https://github.com/AyushExel/assets/assets/15766192/a3c9247b-9271-47df-aaa5-36d96c5034b1\" />\n",
|
134 |
+
"</p>\n"
|
135 |
+
]
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"cell_type": "code",
|
139 |
+
"execution_count": null,
|
140 |
+
"id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7",
|
141 |
+
"metadata": {
|
142 |
+
"id": "9dbfe7d0-8613-4529-adb6-6e0632d7cce7"
|
143 |
+
},
|
144 |
+
"outputs": [],
|
145 |
+
"source": [
|
146 |
+
"exp.plot_similar(idx=6500, limit=20)\n",
|
147 |
+
"#exp.plot_similar(idx=[100,101], limit=10) # Can also pass list of idxs or imgs\n"
|
148 |
+
]
|
149 |
+
},
|
150 |
+
{
|
151 |
+
"cell_type": "code",
|
152 |
+
"execution_count": null,
|
153 |
+
"id": "260e09bf-4960-4089-a676-cb0e76ff3c0d",
|
154 |
+
"metadata": {
|
155 |
+
"id": "260e09bf-4960-4089-a676-cb0e76ff3c0d"
|
156 |
+
},
|
157 |
+
"outputs": [],
|
158 |
+
"source": [
|
159 |
+
"exp.plot_similar(img=\"https://ultralytics.com/images/bus.jpg\", limit=10, labels=False) # Can also pass any external images\n"
|
160 |
+
]
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"cell_type": "markdown",
|
164 |
+
"id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a",
|
165 |
+
"metadata": {
|
166 |
+
"id": "faa0b7a7-6318-40e4-b0f4-45a8113bdc3a"
|
167 |
+
},
|
168 |
+
"source": [
|
169 |
+
"<p>\n",
|
170 |
+
"<img src=\"https://github.com/AyushExel/assets/assets/15766192/8e011195-b0da-43ef-b3cd-5fb6f383037e\">\n",
|
171 |
+
"\n",
|
172 |
+
"</p>"
|
173 |
+
]
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"cell_type": "markdown",
|
177 |
+
"id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553",
|
178 |
+
"metadata": {
|
179 |
+
"id": "0cea63f1-71f1-46da-af2b-b1b7d8f73553"
|
180 |
+
},
|
181 |
+
"source": [
|
182 |
+
"## 2. Ask AI: Search or filter with Natural Language\n",
|
183 |
+
"You can prompt the Explorer object with the kind of data points you want to see and it'll try to return a dataframe with those. Because it is powered by LLMs, it doesn't always get it right. In that case, it'll return None.\n",
|
184 |
+
"<p>\n",
|
185 |
+
"<img width=\"1131\" alt=\"Screenshot 2024-01-07 at 2 34 53 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/c4a69fd9-e54f-4d6a-aba5-dc9cfae1bc04\">\n",
|
186 |
+
"\n",
|
187 |
+
"</p>\n",
|
188 |
+
"\n"
|
189 |
+
]
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"cell_type": "code",
|
193 |
+
"execution_count": null,
|
194 |
+
"id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c",
|
195 |
+
"metadata": {
|
196 |
+
"id": "92fb92ac-7f76-465a-a9ba-ea7492498d9c"
|
197 |
+
},
|
198 |
+
"outputs": [],
|
199 |
+
"source": [
|
200 |
+
"df = exp.ask_ai(\"show me images containing more than 10 objects with at least 2 persons\")\n",
|
201 |
+
"df.head(5)"
|
202 |
+
]
|
203 |
+
},
|
204 |
+
{
|
205 |
+
"cell_type": "markdown",
|
206 |
+
"id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f",
|
207 |
+
"metadata": {
|
208 |
+
"id": "f2a7d26e-0ce5-4578-ad1a-b1253805280f"
|
209 |
+
},
|
210 |
+
"source": [
|
211 |
+
"for plotting these results you can use `plot_query_result` util\n",
|
212 |
+
"Example:\n",
|
213 |
+
"```\n",
|
214 |
+
"plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n",
|
215 |
+
"Image.fromarray(plt)\n",
|
216 |
+
"```\n",
|
217 |
+
"<p>\n",
|
218 |
+
" <img src=\"https://github.com/AyushExel/assets/assets/15766192/2cb780de-d05b-4412-a526-7f7f0f10e669\">\n",
|
219 |
+
"\n",
|
220 |
+
"</p>"
|
221 |
+
]
|
222 |
+
},
|
223 |
+
{
|
224 |
+
"cell_type": "code",
|
225 |
+
"execution_count": null,
|
226 |
+
"id": "b1cfab84-9835-4da0-8e9a-42b30cf84511",
|
227 |
+
"metadata": {
|
228 |
+
"id": "b1cfab84-9835-4da0-8e9a-42b30cf84511"
|
229 |
+
},
|
230 |
+
"outputs": [],
|
231 |
+
"source": [
|
232 |
+
"# plot\n",
|
233 |
+
"from ultralytics.data.explorer import plot_query_result\n",
|
234 |
+
"from PIL import Image\n",
|
235 |
+
"\n",
|
236 |
+
"plt = plot_query_result(exp.ask_ai(\"show me 10 images containing exactly 2 persons\"))\n",
|
237 |
+
"Image.fromarray(plt)"
|
238 |
+
]
|
239 |
+
},
|
240 |
+
{
|
241 |
+
"cell_type": "markdown",
|
242 |
+
"id": "35315ae6-d827-40e4-8813-279f97a83b34",
|
243 |
+
"metadata": {
|
244 |
+
"id": "35315ae6-d827-40e4-8813-279f97a83b34"
|
245 |
+
},
|
246 |
+
"source": [
|
247 |
+
"## 3. Run SQL queries on your Dataset!\n",
|
248 |
+
"Sometimes you might want to investigate a certain type of entries in your dataset. For this Explorer allows you to execute SQL queries.\n",
|
249 |
+
"It accepts either of the formats:\n",
|
250 |
+
"- Queries beginning with \"WHERE\" will automatically select all columns. This can be thought of as a short-hand query\n",
|
251 |
+
"- You can also write full queries where you can specify which columns to select\n",
|
252 |
+
"\n",
|
253 |
+
"This can be used to investigate model performance and specific data points. For example:\n",
|
254 |
+
"- let's say your model struggles on images that have humans and dogs. You can write a query like this to select the points that have at least 2 humans AND at least one dog.\n",
|
255 |
+
"\n",
|
256 |
+
"You can combine SQL query and semantic search to filter down to specific type of results\n",
|
257 |
+
"<img width=\"994\" alt=\"Screenshot 2024-01-06 at 9 47 30 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/92bc3178-c151-4cd5-8007-c76178deb113\">\n"
|
258 |
+
]
|
259 |
+
},
|
260 |
+
{
|
261 |
+
"cell_type": "code",
|
262 |
+
"execution_count": null,
|
263 |
+
"id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf",
|
264 |
+
"metadata": {
|
265 |
+
"id": "8cd1072f-3100-4331-a0e3-4e2f6b1005bf"
|
266 |
+
},
|
267 |
+
"outputs": [],
|
268 |
+
"source": [
|
269 |
+
"table = exp.sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\")\n",
|
270 |
+
"table"
|
271 |
+
]
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"cell_type": "markdown",
|
275 |
+
"id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab",
|
276 |
+
"metadata": {
|
277 |
+
"id": "debf8a00-c9f6-448b-bd3b-454cf62f39ab"
|
278 |
+
},
|
279 |
+
"source": [
|
280 |
+
"Just like similarity search, you also get a util to directly plot the sql queries using `exp.plot_sql_query`\n",
|
281 |
+
"<img src=\"https://github.com/AyushExel/assets/assets/15766192/f8b66629-8dd0-419e-8f44-9837969ba678\">\n"
|
282 |
+
]
|
283 |
+
},
|
284 |
+
{
|
285 |
+
"cell_type": "code",
|
286 |
+
"execution_count": null,
|
287 |
+
"id": "18b977e7-d048-4b22-b8c4-084a03b04f23",
|
288 |
+
"metadata": {
|
289 |
+
"id": "18b977e7-d048-4b22-b8c4-084a03b04f23"
|
290 |
+
},
|
291 |
+
"outputs": [],
|
292 |
+
"source": [
|
293 |
+
"exp.plot_sql_query(\"WHERE labels LIKE '%person, person%' AND labels LIKE '%dog%' LIMIT 10\", labels=True)"
|
294 |
+
]
|
295 |
+
},
|
296 |
+
{
|
297 |
+
"cell_type": "markdown",
|
298 |
+
"id": "f26804c5-840b-4fd1-987f-e362f29e3e06",
|
299 |
+
"metadata": {
|
300 |
+
"id": "f26804c5-840b-4fd1-987f-e362f29e3e06"
|
301 |
+
},
|
302 |
+
"source": [
|
303 |
+
"## 3. Working with embeddings Table (Advanced)\n",
|
304 |
+
"Explorer works on [LanceDB](https://lancedb.github.io/lancedb/) tables internally. You can access this table directly, using `Explorer.table` object and run raw queries, push down pre and post filters, etc."
|
305 |
+
]
|
306 |
+
},
|
307 |
+
{
|
308 |
+
"cell_type": "code",
|
309 |
+
"execution_count": null,
|
310 |
+
"id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a",
|
311 |
+
"metadata": {
|
312 |
+
"id": "ea69260a-3407-40c9-9f42-8b34a6e6af7a"
|
313 |
+
},
|
314 |
+
"outputs": [],
|
315 |
+
"source": [
|
316 |
+
"table = exp.table\n",
|
317 |
+
"table.schema"
|
318 |
+
]
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"cell_type": "markdown",
|
322 |
+
"id": "238db292-8610-40b3-9af7-dfd6be174892",
|
323 |
+
"metadata": {
|
324 |
+
"id": "238db292-8610-40b3-9af7-dfd6be174892"
|
325 |
+
},
|
326 |
+
"source": [
|
327 |
+
"### Run raw queries\n",
|
328 |
+
"Vector Search finds the nearest vectors from the database. In a recommendation system or search engine, you can find similar products from the one you searched. In LLM and other AI applications, each data point can be presented by the embeddings generated from some models, it returns the most relevant features.\n",
|
329 |
+
"\n",
|
330 |
+
"A search in high-dimensional vector space, is to find K-Nearest-Neighbors (KNN) of the query vector.\n",
|
331 |
+
"\n",
|
332 |
+
"Metric\n",
|
333 |
+
"In LanceDB, a Metric is the way to describe the distance between a pair of vectors. Currently, it supports the following metrics:\n",
|
334 |
+
"- L2\n",
|
335 |
+
"- Cosine\n",
|
336 |
+
"- Dot\n",
|
337 |
+
"Explorer's similarity search uses L2 by default. You can run queries on tables directly, or use the lance format to build custom utilities to manage datasets. More details on available LanceDB table ops in the [docs](https://lancedb.github.io/lancedb/)\n",
|
338 |
+
"\n",
|
339 |
+
"<img width=\"1015\" alt=\"Screenshot 2024-01-06 at 9 48 35 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/a2ccdaf3-8877-4f70-bf47-8a9bd2bb20c0\">\n"
|
340 |
+
]
|
341 |
+
},
|
342 |
+
{
|
343 |
+
"cell_type": "code",
|
344 |
+
"execution_count": null,
|
345 |
+
"id": "d74430fe-5aee-45a1-8863-3f2c31338792",
|
346 |
+
"metadata": {
|
347 |
+
"id": "d74430fe-5aee-45a1-8863-3f2c31338792"
|
348 |
+
},
|
349 |
+
"outputs": [],
|
350 |
+
"source": [
|
351 |
+
"dummy_img_embedding = [i for i in range(256)]\n",
|
352 |
+
"table.search(dummy_img_embedding).limit(5).to_pandas()"
|
353 |
+
]
|
354 |
+
},
|
355 |
+
{
|
356 |
+
"cell_type": "markdown",
|
357 |
+
"id": "587486b4-0d19-4214-b994-f032fb2e8eb5",
|
358 |
+
"metadata": {
|
359 |
+
"id": "587486b4-0d19-4214-b994-f032fb2e8eb5"
|
360 |
+
},
|
361 |
+
"source": [
|
362 |
+
"### Inter-conversion to popular data formats"
|
363 |
+
]
|
364 |
+
},
|
365 |
+
{
|
366 |
+
"cell_type": "code",
|
367 |
+
"execution_count": null,
|
368 |
+
"id": "bb2876ea-999b-4eba-96bc-c196ba02c41c",
|
369 |
+
"metadata": {
|
370 |
+
"id": "bb2876ea-999b-4eba-96bc-c196ba02c41c"
|
371 |
+
},
|
372 |
+
"outputs": [],
|
373 |
+
"source": [
|
374 |
+
"df = table.to_pandas()\n",
|
375 |
+
"pa_table = table.to_arrow()\n"
|
376 |
+
]
|
377 |
+
},
|
378 |
+
{
|
379 |
+
"cell_type": "markdown",
|
380 |
+
"id": "42659d63-ad76-49d6-8dfc-78d77278db72",
|
381 |
+
"metadata": {
|
382 |
+
"id": "42659d63-ad76-49d6-8dfc-78d77278db72"
|
383 |
+
},
|
384 |
+
"source": [
|
385 |
+
"### Work with Embeddings\n",
|
386 |
+
"You can access the raw embedding from lancedb Table and analyse it. The image embeddings are stored in column `vector`"
|
387 |
+
]
|
388 |
+
},
|
389 |
+
{
|
390 |
+
"cell_type": "code",
|
391 |
+
"execution_count": null,
|
392 |
+
"id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca",
|
393 |
+
"metadata": {
|
394 |
+
"id": "66d69e9b-046e-41c8-80d7-c0ee40be3bca"
|
395 |
+
},
|
396 |
+
"outputs": [],
|
397 |
+
"source": [
|
398 |
+
"import numpy as np\n",
|
399 |
+
"\n",
|
400 |
+
"embeddings = table.to_pandas()[\"vector\"].tolist()\n",
|
401 |
+
"embeddings = np.array(embeddings)"
|
402 |
+
]
|
403 |
+
},
|
404 |
+
{
|
405 |
+
"cell_type": "markdown",
|
406 |
+
"id": "e8df0a49-9596-4399-954b-b8ae1fd7a602",
|
407 |
+
"metadata": {
|
408 |
+
"id": "e8df0a49-9596-4399-954b-b8ae1fd7a602"
|
409 |
+
},
|
410 |
+
"source": [
|
411 |
+
"### Scatterplot\n",
|
412 |
+
"One of the preliminary steps in analysing embeddings is by plotting them in 2D space via dimensionality reduction. Let's try an example\n",
|
413 |
+
"\n",
|
414 |
+
"<img width=\"646\" alt=\"Screenshot 2024-01-06 at 9 48 58 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/9e1da25c-face-4426-abc0-2f64a4e4952c\">\n"
|
415 |
+
]
|
416 |
+
},
|
417 |
+
{
|
418 |
+
"cell_type": "code",
|
419 |
+
"execution_count": null,
|
420 |
+
"id": "d9a150e8-8092-41b3-82f8-2247f8187fc8",
|
421 |
+
"metadata": {
|
422 |
+
"id": "d9a150e8-8092-41b3-82f8-2247f8187fc8"
|
423 |
+
},
|
424 |
+
"outputs": [],
|
425 |
+
"source": [
|
426 |
+
"!pip install scikit-learn --q"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"cell_type": "code",
|
431 |
+
"execution_count": null,
|
432 |
+
"id": "196079c3-45a9-4325-81ab-af79a881e37a",
|
433 |
+
"metadata": {
|
434 |
+
"id": "196079c3-45a9-4325-81ab-af79a881e37a"
|
435 |
+
},
|
436 |
+
"outputs": [],
|
437 |
+
"source": [
|
438 |
+
"%matplotlib inline\n",
|
439 |
+
"import numpy as np\n",
|
440 |
+
"from sklearn.decomposition import PCA\n",
|
441 |
+
"import matplotlib.pyplot as plt\n",
|
442 |
+
"from mpl_toolkits.mplot3d import Axes3D\n",
|
443 |
+
"\n",
|
444 |
+
"# Reduce dimensions using PCA to 3 components for visualization in 3D\n",
|
445 |
+
"pca = PCA(n_components=3)\n",
|
446 |
+
"reduced_data = pca.fit_transform(embeddings)\n",
|
447 |
+
"\n",
|
448 |
+
"# Create a 3D scatter plot using Matplotlib's Axes3D\n",
|
449 |
+
"fig = plt.figure(figsize=(8, 6))\n",
|
450 |
+
"ax = fig.add_subplot(111, projection='3d')\n",
|
451 |
+
"\n",
|
452 |
+
"# Scatter plot\n",
|
453 |
+
"ax.scatter(reduced_data[:, 0], reduced_data[:, 1], reduced_data[:, 2], alpha=0.5)\n",
|
454 |
+
"ax.set_title('3D Scatter Plot of Reduced 256-Dimensional Data (PCA)')\n",
|
455 |
+
"ax.set_xlabel('Component 1')\n",
|
456 |
+
"ax.set_ylabel('Component 2')\n",
|
457 |
+
"ax.set_zlabel('Component 3')\n",
|
458 |
+
"\n",
|
459 |
+
"plt.show()"
|
460 |
+
]
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"cell_type": "markdown",
|
464 |
+
"id": "1c843c23-e3f2-490e-8d6c-212fa038a149",
|
465 |
+
"metadata": {
|
466 |
+
"id": "1c843c23-e3f2-490e-8d6c-212fa038a149"
|
467 |
+
},
|
468 |
+
"source": [
|
469 |
+
"## 4. Similarity Index\n",
|
470 |
+
"Here's a simple example of an operation powered by the embeddings table. Explorer comes with a `similarity_index` operation-\n",
|
471 |
+
"* It tries to estimate how similar each data point is with the rest of the dataset.\n",
|
472 |
+
"* It does that by counting how many image embeddings lie closer than `max_dist` to the current image in the generated embedding space, considering `top_k` similar images at a time.\n",
|
473 |
+
"\n",
|
474 |
+
"For a given dataset, model, `max_dist` & `top_k` the similarity index once generated will be reused. In case, your dataset has changed, or you simply need to regenerate the similarity index, you can pass `force=True`.\n",
|
475 |
+
"Similar to vector and SQL search, this also comes with a util to directly plot it. Let's look at the plot first\n",
|
476 |
+
"<img width=\"633\" alt=\"Screenshot 2024-01-06 at 9 49 36 PM\" src=\"https://github.com/AyushExel/assets/assets/15766192/96a9d984-4a72-4784-ace1-428676ee2bdd\">\n",
|
477 |
+
"\n"
|
478 |
+
]
|
479 |
+
},
|
480 |
+
{
|
481 |
+
"cell_type": "code",
|
482 |
+
"execution_count": null,
|
483 |
+
"id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc",
|
484 |
+
"metadata": {
|
485 |
+
"id": "953c2a5f-1b61-4acf-a8e4-ed08547dbafc"
|
486 |
+
},
|
487 |
+
"outputs": [],
|
488 |
+
"source": [
|
489 |
+
"exp.plot_similarity_index(max_dist=0.2, top_k=0.01)"
|
490 |
+
]
|
491 |
+
},
|
492 |
+
{
|
493 |
+
"cell_type": "markdown",
|
494 |
+
"id": "28228a9a-b727-45b5-8ca7-8db662c0b937",
|
495 |
+
"metadata": {
|
496 |
+
"id": "28228a9a-b727-45b5-8ca7-8db662c0b937"
|
497 |
+
},
|
498 |
+
"source": [
|
499 |
+
"Now let's look at the output of the operation"
|
500 |
+
]
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"cell_type": "code",
|
504 |
+
"execution_count": null,
|
505 |
+
"id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a",
|
506 |
+
"metadata": {
|
507 |
+
"id": "f4161aaa-20e6-4df0-8e87-d2293ee0530a"
|
508 |
+
},
|
509 |
+
"outputs": [],
|
510 |
+
"source": [
|
511 |
+
"import numpy as np\n",
|
512 |
+
"\n",
|
513 |
+
"sim_idx = exp.similarity_index(max_dist=0.2, top_k=0.01, force=False)"
|
514 |
+
]
|
515 |
+
},
|
516 |
+
{
|
517 |
+
"cell_type": "code",
|
518 |
+
"execution_count": null,
|
519 |
+
"id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d",
|
520 |
+
"metadata": {
|
521 |
+
"id": "b01d5b1a-9adb-4c3c-a873-217c71527c8d"
|
522 |
+
},
|
523 |
+
"outputs": [],
|
524 |
+
"source": [
|
525 |
+
"sim_idx"
|
526 |
+
]
|
527 |
+
},
|
528 |
+
{
|
529 |
+
"cell_type": "markdown",
|
530 |
+
"id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4",
|
531 |
+
"metadata": {
|
532 |
+
"id": "22b28e54-4fbb-400e-ad8c-7068cbba11c4"
|
533 |
+
},
|
534 |
+
"source": [
|
535 |
+
"Let's create a query to see what data points have similarity count of more than 30 and plot images similar to them."
|
536 |
+
]
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"cell_type": "code",
|
540 |
+
"execution_count": null,
|
541 |
+
"id": "58d2557b-d401-43cf-937d-4f554c7bc808",
|
542 |
+
"metadata": {
|
543 |
+
"id": "58d2557b-d401-43cf-937d-4f554c7bc808"
|
544 |
+
},
|
545 |
+
"outputs": [],
|
546 |
+
"source": [
|
547 |
+
"import numpy as np\n",
|
548 |
+
"\n",
|
549 |
+
"sim_count = np.array(sim_idx[\"count\"])\n",
|
550 |
+
"sim_idx['im_file'][sim_count > 30]"
|
551 |
+
]
|
552 |
+
},
|
553 |
+
{
|
554 |
+
"cell_type": "markdown",
|
555 |
+
"id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e",
|
556 |
+
"metadata": {
|
557 |
+
"id": "a5ec8d76-271a-41ab-ac74-cf8c0084ba5e"
|
558 |
+
},
|
559 |
+
"source": [
|
560 |
+
"You should see something like this\n",
|
561 |
+
"<img src=\"https://github.com/AyushExel/assets/assets/15766192/649bc366-ca2d-46ea-bfd9-3097cf575584\">\n"
|
562 |
+
]
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"cell_type": "code",
|
566 |
+
"execution_count": null,
|
567 |
+
"id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2",
|
568 |
+
"metadata": {
|
569 |
+
"id": "3a7b2ee3-9f35-48a2-9c38-38379516f4d2"
|
570 |
+
},
|
571 |
+
"outputs": [],
|
572 |
+
"source": [
|
573 |
+
"exp.plot_similar(idx=[7146, 14035]) # Using avg embeddings of 2 images"
|
574 |
+
]
|
575 |
+
}
|
576 |
+
],
|
577 |
+
"metadata": {
|
578 |
+
"kernelspec": {
|
579 |
+
"display_name": "Python 3 (ipykernel)",
|
580 |
+
"language": "python",
|
581 |
+
"name": "python3"
|
582 |
+
},
|
583 |
+
"language_info": {
|
584 |
+
"codemirror_mode": {
|
585 |
+
"name": "ipython",
|
586 |
+
"version": 3
|
587 |
+
},
|
588 |
+
"file_extension": ".py",
|
589 |
+
"mimetype": "text/x-python",
|
590 |
+
"name": "python",
|
591 |
+
"nbconvert_exporter": "python",
|
592 |
+
"pygments_lexer": "ipython3",
|
593 |
+
"version": "3.9.6"
|
594 |
+
},
|
595 |
+
"colab": {
|
596 |
+
"provenance": []
|
597 |
+
}
|
598 |
+
},
|
599 |
+
"nbformat": 4,
|
600 |
+
"nbformat_minor": 5
|
601 |
+
}
|
docs/en/datasets/explorer/index.md
ADDED
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the Ultralytics Explorer, a versatile tool and Python API for CV dataset exploration, enabling semantic search, SQL queries, and vector similarity searches.
|
4 |
+
keywords: Ultralytics Explorer, CV Dataset Tools, Semantic Search, SQL Dataset Queries, Vector Similarity, Python API, GUI Explorer, Dataset Analysis, YOLO Explorer, Data Insights
|
5 |
+
---
|
6 |
+
|
7 |
+
# Ultralytics Explorer
|
8 |
+
|
9 |
+
<p>
|
10 |
+
<img width="1709" alt="Ultralytics Explorer Screenshot 1" src="https://github.com/ultralytics/ultralytics/assets/15766192/feb1fe05-58c5-4173-a9ff-e611e3bba3d0">
|
11 |
+
</p>
|
12 |
+
|
13 |
+
<a href="https://colab.research.google.com/github/ultralytics/ultralytics/blob/main/docs/en/datasets/explorer/explorer.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
|
14 |
+
Ultralytics Explorer is a tool for exploring CV datasets using semantic search, SQL queries, vector similarity search and even using natural language. It is also a Python API for accessing the same functionality.
|
15 |
+
|
16 |
+
<p align="center">
|
17 |
+
<br>
|
18 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/3VryynorQeo"
|
19 |
+
title="YouTube video player" frameborder="0"
|
20 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
21 |
+
allowfullscreen>
|
22 |
+
</iframe>
|
23 |
+
<br>
|
24 |
+
<strong>Watch:</strong> Ultralytics Explorer API | Semantic Search, SQL Queries & Ask AI Features
|
25 |
+
</p>
|
26 |
+
|
27 |
+
### Installation of optional dependencies
|
28 |
+
|
29 |
+
Explorer depends on external libraries for some of its functionality. These are automatically installed on usage. To manually install these dependencies, use the following command:
|
30 |
+
|
31 |
+
```bash
|
32 |
+
pip install ultralytics[explorer]
|
33 |
+
```
|
34 |
+
|
35 |
+
!!! tip
|
36 |
+
|
37 |
+
Explorer works on embedding/semantic search & SQL querying and is powered by [LanceDB](https://lancedb.com/) serverless vector database. Unlike traditional in-memory DBs, it is persisted on disk without sacrificing performance, so you can scale locally to large datasets like COCO without running out of memory.
|
38 |
+
|
39 |
+
### Explorer API
|
40 |
+
|
41 |
+
This is a Python API for Exploring your datasets. It also powers the GUI Explorer. You can use this to create your own exploratory notebooks or scripts to get insights into your datasets.
|
42 |
+
|
43 |
+
Learn more about the Explorer API [here](api.md).
|
44 |
+
|
45 |
+
## GUI Explorer Usage
|
46 |
+
|
47 |
+
The GUI demo runs in your browser allowing you to create embeddings for your dataset and search for similar images, run SQL queries and perform semantic search. It can be run using the following command:
|
48 |
+
|
49 |
+
```bash
|
50 |
+
yolo explorer
|
51 |
+
```
|
52 |
+
|
53 |
+
!!! note "Note"
|
54 |
+
|
55 |
+
Ask AI feature works using OpenAI, so you'll be prompted to set the api key for OpenAI when you first run the GUI.
|
56 |
+
You can set it like this - `yolo settings openai_api_key="..."`
|
57 |
+
|
58 |
+
<p>
|
59 |
+
<img width="1709" alt="Ultralytics Explorer OpenAI Integration" src="https://github.com/AyushExel/assets/assets/15766192/1b5f3708-be3e-44c5-9ea3-adcd522dfc75">
|
60 |
+
</p>
|
docs/en/datasets/index.md
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore various computer vision datasets supported by Ultralytics for object detection, segmentation, pose estimation, image classification, and multi-object tracking.
|
4 |
+
keywords: computer vision, datasets, Ultralytics, YOLO, object detection, instance segmentation, pose estimation, image classification, multi-object tracking
|
5 |
+
---
|
6 |
+
|
7 |
+
# Datasets Overview
|
8 |
+
|
9 |
+
Ultralytics provides support for various datasets to facilitate computer vision tasks such as detection, instance segmentation, pose estimation, classification, and multi-object tracking. Below is a list of the main Ultralytics datasets, followed by a summary of each computer vision task and the respective datasets.
|
10 |
+
|
11 |
+
<p align="center">
|
12 |
+
<br>
|
13 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/YDXKa1EljmU"
|
14 |
+
title="YouTube video player" frameborder="0"
|
15 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
16 |
+
allowfullscreen>
|
17 |
+
</iframe>
|
18 |
+
<br>
|
19 |
+
<strong>Watch:</strong> Ultralytics Datasets Overview
|
20 |
+
</p>
|
21 |
+
|
22 |
+
## NEW 🚀 Ultralytics Explorer
|
23 |
+
|
24 |
+
Create embeddings for your dataset, search for similar images, run SQL queries, perform semantic search and even search using natural language! You can get started with our GUI app or build your own using the API. Learn more [here](explorer/index.md).
|
25 |
+
|
26 |
+
<p>
|
27 |
+
<img alt="Ultralytics Explorer Screenshot" src="https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/d2ebaffd-e065-4d88-983a-33cb6f593785">
|
28 |
+
</p>
|
29 |
+
|
30 |
+
- Try the [GUI Demo](explorer/index.md)
|
31 |
+
- Learn more about the [Explorer API](explorer/index.md)
|
32 |
+
|
33 |
+
## [Detection Datasets](detect/index.md)
|
34 |
+
|
35 |
+
Bounding box object detection is a computer vision technique that involves detecting and localizing objects in an image by drawing a bounding box around each object.
|
36 |
+
|
37 |
+
- [Argoverse](detect/argoverse.md): A dataset containing 3D tracking and motion forecasting data from urban environments with rich annotations.
|
38 |
+
- [COCO](detect/coco.md): A large-scale dataset designed for object detection, segmentation, and captioning with over 200K labeled images.
|
39 |
+
- [COCO8](detect/coco8.md): Contains the first 4 images from COCO train and COCO val, suitable for quick tests.
|
40 |
+
- [Global Wheat 2020](detect/globalwheat2020.md): A dataset of wheat head images collected from around the world for object detection and localization tasks.
|
41 |
+
- [Objects365](detect/objects365.md): A high-quality, large-scale dataset for object detection with 365 object categories and over 600K annotated images.
|
42 |
+
- [OpenImagesV7](detect/open-images-v7.md): A comprehensive dataset by Google with 1.7M train images and 42k validation images.
|
43 |
+
- [SKU-110K](detect/sku-110k.md): A dataset featuring dense object detection in retail environments with over 11K images and 1.7 million bounding boxes.
|
44 |
+
- [VisDrone](detect/visdrone.md): A dataset containing object detection and multi-object tracking data from drone-captured imagery with over 10K images and video sequences.
|
45 |
+
- [VOC](detect/voc.md): The Pascal Visual Object Classes (VOC) dataset for object detection and segmentation with 20 object classes and over 11K images.
|
46 |
+
- [xView](detect/xview.md): A dataset for object detection in overhead imagery with 60 object categories and over 1 million annotated objects.
|
47 |
+
- [Roboflow 100](detect/roboflow-100.md): A diverse object detection benchmark with 100 datasets spanning seven imagery domains for comprehensive model evaluation.
|
48 |
+
- [Brain-tumor](detect/brain-tumor.md): A dataset for detecting brain tumors includes MRI or CT scan images with details on tumor presence, location, and characteristics. It's vital for training computer vision models to automate tumor identification, aiding in early diagnosis and treatment planning.
|
49 |
+
- [African-wildlife](detect/african-wildlife.md): A dataset featuring images of African wildlife, including buffalo, elephant, rhino, and zebra, aids in training computer vision models. Essential for identifying animals in various habitats, it supports wildlife research.
|
50 |
+
|
51 |
+
## [Instance Segmentation Datasets](segment/index.md)
|
52 |
+
|
53 |
+
Instance segmentation is a computer vision technique that involves identifying and localizing objects in an image at the pixel level.
|
54 |
+
|
55 |
+
- [COCO](segment/coco.md): A large-scale dataset designed for object detection, segmentation, and captioning tasks with over 200K labeled images.
|
56 |
+
- [COCO8-seg](segment/coco8-seg.md): A smaller dataset for instance segmentation tasks, containing a subset of 8 COCO images with segmentation annotations.
|
57 |
+
- [Crack-seg](segment/crack-seg.md): Specifically crafted dataset for detecting cracks on roads and walls, applicable for both object detection and segmentation tasks.
|
58 |
+
- [Package-seg](segment/package-seg.md): Tailored dataset for identifying packages in warehouses or industrial settings, suitable for both object detection and segmentation applications.
|
59 |
+
- [Carparts-seg](segment/carparts-seg.md): Purpose-built dataset for identifying vehicle parts, catering to design, manufacturing, and research needs. It serves for both object detection and segmentation tasks.
|
60 |
+
|
61 |
+
## [Pose Estimation](pose/index.md)
|
62 |
+
|
63 |
+
Pose estimation is a technique used to determine the pose of the object relative to the camera or the world coordinate system.
|
64 |
+
|
65 |
+
- [COCO](pose/coco.md): A large-scale dataset with human pose annotations designed for pose estimation tasks.
|
66 |
+
- [COCO8-pose](pose/coco8-pose.md): A smaller dataset for pose estimation tasks, containing a subset of 8 COCO images with human pose annotations.
|
67 |
+
- [Tiger-pose](pose/tiger-pose.md): A compact dataset consisting of 263 images focused on tigers, annotated with 12 keypoints per tiger for pose estimation tasks.
|
68 |
+
|
69 |
+
## [Classification](classify/index.md)
|
70 |
+
|
71 |
+
Image classification is a computer vision task that involves categorizing an image into one or more predefined classes or categories based on its visual content.
|
72 |
+
|
73 |
+
- [Caltech 101](classify/caltech101.md): A dataset containing images of 101 object categories for image classification tasks.
|
74 |
+
- [Caltech 256](classify/caltech256.md): An extended version of Caltech 101 with 256 object categories and more challenging images.
|
75 |
+
- [CIFAR-10](classify/cifar10.md): A dataset of 60K 32x32 color images in 10 classes, with 6K images per class.
|
76 |
+
- [CIFAR-100](classify/cifar100.md): An extended version of CIFAR-10 with 100 object categories and 600 images per class.
|
77 |
+
- [Fashion-MNIST](classify/fashion-mnist.md): A dataset consisting of 70,000 grayscale images of 10 fashion categories for image classification tasks.
|
78 |
+
- [ImageNet](classify/imagenet.md): A large-scale dataset for object detection and image classification with over 14 million images and 20,000 categories.
|
79 |
+
- [ImageNet-10](classify/imagenet10.md): A smaller subset of ImageNet with 10 categories for faster experimentation and testing.
|
80 |
+
- [Imagenette](classify/imagenette.md): A smaller subset of ImageNet that contains 10 easily distinguishable classes for quicker training and testing.
|
81 |
+
- [Imagewoof](classify/imagewoof.md): A more challenging subset of ImageNet containing 10 dog breed categories for image classification tasks.
|
82 |
+
- [MNIST](classify/mnist.md): A dataset of 70,000 grayscale images of handwritten digits for image classification tasks.
|
83 |
+
|
84 |
+
## [Oriented Bounding Boxes (OBB)](obb/index.md)
|
85 |
+
|
86 |
+
Oriented Bounding Boxes (OBB) is a method in computer vision for detecting angled objects in images using rotated bounding boxes, often applied to aerial and satellite imagery.
|
87 |
+
|
88 |
+
- [DOTAv2](obb/dota-v2.md): A popular OBB aerial imagery dataset with 1.7 million instances and 11,268 images.
|
89 |
+
|
90 |
+
## [Multi-Object Tracking](track/index.md)
|
91 |
+
|
92 |
+
Multi-object tracking is a computer vision technique that involves detecting and tracking multiple objects over time in a video sequence.
|
93 |
+
|
94 |
+
- [Argoverse](detect/argoverse.md): A dataset containing 3D tracking and motion forecasting data from urban environments with rich annotations for multi-object tracking tasks.
|
95 |
+
- [VisDrone](detect/visdrone.md): A dataset containing object detection and multi-object tracking data from drone-captured imagery with over 10K images and video sequences.
|
96 |
+
|
97 |
+
## Contribute New Datasets
|
98 |
+
|
99 |
+
Contributing a new dataset involves several steps to ensure that it aligns well with the existing infrastructure. Below are the necessary steps:
|
100 |
+
|
101 |
+
### Steps to Contribute a New Dataset
|
102 |
+
|
103 |
+
1. **Collect Images**: Gather the images that belong to the dataset. These could be collected from various sources, such as public databases or your own collection.
|
104 |
+
|
105 |
+
2. **Annotate Images**: Annotate these images with bounding boxes, segments, or keypoints, depending on the task.
|
106 |
+
|
107 |
+
3. **Export Annotations**: Convert these annotations into the YOLO `*.txt` file format which Ultralytics supports.
|
108 |
+
|
109 |
+
4. **Organize Dataset**: Arrange your dataset into the correct folder structure. You should have `train/` and `val/` top-level directories, and within each, an `images/` and `labels/` subdirectory.
|
110 |
+
|
111 |
+
```
|
112 |
+
dataset/
|
113 |
+
├── train/
|
114 |
+
│ ├── images/
|
115 |
+
│ └── labels/
|
116 |
+
└── val/
|
117 |
+
├── images/
|
118 |
+
└── labels/
|
119 |
+
```
|
120 |
+
|
121 |
+
5. **Create a `data.yaml` File**: In your dataset's root directory, create a `data.yaml` file that describes the dataset, classes, and other necessary information.
|
122 |
+
|
123 |
+
6. **Optimize Images (Optional)**: If you want to reduce the size of the dataset for more efficient processing, you can optimize the images using the code below. This is not required, but recommended for smaller dataset sizes and faster download speeds.
|
124 |
+
|
125 |
+
7. **Zip Dataset**: Compress the entire dataset folder into a zip file.
|
126 |
+
|
127 |
+
8. **Document and PR**: Create a documentation page describing your dataset and how it fits into the existing framework. After that, submit a Pull Request (PR). Refer to [Ultralytics Contribution Guidelines](https://docs.ultralytics.com/help/contributing) for more details on how to submit a PR.
|
128 |
+
|
129 |
+
### Example Code to Optimize and Zip a Dataset
|
130 |
+
|
131 |
+
!!! Example "Optimize and Zip a Dataset"
|
132 |
+
|
133 |
+
=== "Python"
|
134 |
+
|
135 |
+
```python
|
136 |
+
from pathlib import Path
|
137 |
+
from ultralytics.data.utils import compress_one_image
|
138 |
+
from ultralytics.utils.downloads import zip_directory
|
139 |
+
|
140 |
+
# Define dataset directory
|
141 |
+
path = Path('path/to/dataset')
|
142 |
+
|
143 |
+
# Optimize images in dataset (optional)
|
144 |
+
for f in path.rglob('*.jpg'):
|
145 |
+
compress_one_image(f)
|
146 |
+
|
147 |
+
# Zip dataset into 'path/to/dataset.zip'
|
148 |
+
zip_directory(path)
|
149 |
+
```
|
150 |
+
|
151 |
+
By following these steps, you can contribute a new dataset that integrates well with Ultralytics' existing structure.
|
docs/en/datasets/obb/dota-v2.md
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Delve into DOTA, an Oriented Bounding Box (OBB) aerial imagery dataset with 1.7 million instances and 11,268 images.
|
4 |
+
keywords: DOTA v1, DOTA v1.5, DOTA v2, object detection, aerial images, computer vision, deep learning, annotations, oriented bounding boxes, OBB
|
5 |
+
---
|
6 |
+
|
7 |
+
# DOTA Dataset with OBB
|
8 |
+
|
9 |
+
[DOTA](https://captain-whu.github.io/DOTA/index.html) stands as a specialized dataset, emphasizing object detection in aerial images. Originating from the DOTA series of datasets, it offers annotated images capturing a diverse array of aerial scenes with Oriented Bounding Boxes (OBB).
|
10 |
+
|
11 |
+
![DOTA classes visual](https://user-images.githubusercontent.com/26833433/259461765-72fdd0d8-266b-44a9-8199-199329bf5ca9.jpg)
|
12 |
+
|
13 |
+
## Key Features
|
14 |
+
|
15 |
+
- Collection from various sensors and platforms, with image sizes ranging from 800 × 800 to 20,000 × 20,000 pixels.
|
16 |
+
- Features more than 1.7M Oriented Bounding Boxes across 18 categories.
|
17 |
+
- Encompasses multiscale object detection.
|
18 |
+
- Instances are annotated by experts using arbitrary (8 d.o.f.) quadrilateral, capturing objects of different scales, orientations, and shapes.
|
19 |
+
|
20 |
+
## Dataset Versions
|
21 |
+
|
22 |
+
### DOTA-v1.0
|
23 |
+
|
24 |
+
- Contains 15 common categories.
|
25 |
+
- Comprises 2,806 images with 188,282 instances.
|
26 |
+
- Split ratios: 1/2 for training, 1/6 for validation, and 1/3 for testing.
|
27 |
+
|
28 |
+
### DOTA-v1.5
|
29 |
+
|
30 |
+
- Incorporates the same images as DOTA-v1.0.
|
31 |
+
- Very small instances (less than 10 pixels) are also annotated.
|
32 |
+
- Addition of a new category: "container crane".
|
33 |
+
- A total of 403,318 instances.
|
34 |
+
- Released for the DOAI Challenge 2019 on Object Detection in Aerial Images.
|
35 |
+
|
36 |
+
### DOTA-v2.0
|
37 |
+
|
38 |
+
- Collections from Google Earth, GF-2 Satellite, and other aerial images.
|
39 |
+
- Contains 18 common categories.
|
40 |
+
- Comprises 11,268 images with a whopping 1,793,658 instances.
|
41 |
+
- New categories introduced: "airport" and "helipad".
|
42 |
+
- Image splits:
|
43 |
+
- Training: 1,830 images with 268,627 instances.
|
44 |
+
- Validation: 593 images with 81,048 instances.
|
45 |
+
- Test-dev: 2,792 images with 353,346 instances.
|
46 |
+
- Test-challenge: 6,053 images with 1,090,637 instances.
|
47 |
+
|
48 |
+
## Dataset Structure
|
49 |
+
|
50 |
+
DOTA exhibits a structured layout tailored for OBB object detection challenges:
|
51 |
+
|
52 |
+
- **Images**: A vast collection of high-resolution aerial images capturing diverse terrains and structures.
|
53 |
+
- **Oriented Bounding Boxes**: Annotations in the form of rotated rectangles encapsulating objects irrespective of their orientation, ideal for capturing objects like airplanes, ships, and buildings.
|
54 |
+
|
55 |
+
## Applications
|
56 |
+
|
57 |
+
DOTA serves as a benchmark for training and evaluating models specifically tailored for aerial image analysis. With the inclusion of OBB annotations, it provides a unique challenge, enabling the development of specialized object detection models that cater to aerial imagery's nuances.
|
58 |
+
|
59 |
+
## Dataset YAML
|
60 |
+
|
61 |
+
Typically, datasets incorporate a YAML (Yet Another Markup Language) file detailing the dataset's configuration. For DOTA v1 and DOTA v1.5, Ultralytics provides `DOTAv1.yaml` and `DOTAv1.5.yaml` files. For additional details on these as well as DOTA v2 please consult DOTA's official repository and documentation.
|
62 |
+
|
63 |
+
!!! Example "DOTAv1.yaml"
|
64 |
+
|
65 |
+
```yaml
|
66 |
+
--8<-- "ultralytics/cfg/datasets/DOTAv1.yaml"
|
67 |
+
```
|
68 |
+
|
69 |
+
## Split DOTA images
|
70 |
+
|
71 |
+
To train DOTA dataset, we split original DOTA images with high-resolution into images with 1024x1024 resolution in multiscale way.
|
72 |
+
|
73 |
+
!!! Example "Split images"
|
74 |
+
|
75 |
+
=== "Python"
|
76 |
+
|
77 |
+
```python
|
78 |
+
from ultralytics.data.split_dota import split_trainval, split_test
|
79 |
+
|
80 |
+
# split train and val set, with labels.
|
81 |
+
split_trainval(
|
82 |
+
data_root='path/to/DOTAv1.0/',
|
83 |
+
save_dir='path/to/DOTAv1.0-split/',
|
84 |
+
rates=[0.5, 1.0, 1.5], # multiscale
|
85 |
+
gap=500
|
86 |
+
)
|
87 |
+
# split test set, without labels.
|
88 |
+
split_test(
|
89 |
+
data_root='path/to/DOTAv1.0/',
|
90 |
+
save_dir='path/to/DOTAv1.0-split/',
|
91 |
+
rates=[0.5, 1.0, 1.5], # multiscale
|
92 |
+
gap=500
|
93 |
+
)
|
94 |
+
```
|
95 |
+
|
96 |
+
## Usage
|
97 |
+
|
98 |
+
To train a model on the DOTA v1 dataset, you can utilize the following code snippets. Always refer to your model's documentation for a thorough list of available arguments.
|
99 |
+
|
100 |
+
!!! Warning
|
101 |
+
|
102 |
+
Please note that all images and associated annotations in the DOTAv1 dataset can be used for academic purposes, but commercial use is prohibited. Your understanding and respect for the dataset creators' wishes are greatly appreciated!
|
103 |
+
|
104 |
+
!!! Example "Train Example"
|
105 |
+
|
106 |
+
=== "Python"
|
107 |
+
|
108 |
+
```python
|
109 |
+
from ultralytics import YOLO
|
110 |
+
|
111 |
+
# Create a new YOLOv8n-OBB model from scratch
|
112 |
+
model = YOLO('yolov8n-obb.yaml')
|
113 |
+
|
114 |
+
# Train the model on the DOTAv2 dataset
|
115 |
+
results = model.train(data='DOTAv1.yaml', epochs=100, imgsz=640)
|
116 |
+
```
|
117 |
+
|
118 |
+
=== "CLI"
|
119 |
+
|
120 |
+
```bash
|
121 |
+
# Train a new YOLOv8n-OBB model on the DOTAv2 dataset
|
122 |
+
yolo obb train data=DOTAv1.yaml model=yolov8n-obb.pt epochs=100 imgsz=640
|
123 |
+
```
|
124 |
+
|
125 |
+
## Sample Data and Annotations
|
126 |
+
|
127 |
+
Having a glance at the dataset illustrates its depth:
|
128 |
+
|
129 |
+
![Dataset sample image](https://captain-whu.github.io/DOTA/images/instances-DOTA.jpg)
|
130 |
+
|
131 |
+
- **DOTA examples**: This snapshot underlines the complexity of aerial scenes and the significance of Oriented Bounding Box annotations, capturing objects in their natural orientation.
|
132 |
+
|
133 |
+
The dataset's richness offers invaluable insights into object detection challenges exclusive to aerial imagery.
|
134 |
+
|
135 |
+
## Citations and Acknowledgments
|
136 |
+
|
137 |
+
For those leveraging DOTA in their endeavors, it's pertinent to cite the relevant research papers:
|
138 |
+
|
139 |
+
!!! Quote ""
|
140 |
+
|
141 |
+
=== "BibTeX"
|
142 |
+
|
143 |
+
```bibtex
|
144 |
+
@article{9560031,
|
145 |
+
author={Ding, Jian and Xue, Nan and Xia, Gui-Song and Bai, Xiang and Yang, Wen and Yang, Michael and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei},
|
146 |
+
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
147 |
+
title={Object Detection in Aerial Images: A Large-Scale Benchmark and Challenges},
|
148 |
+
year={2021},
|
149 |
+
volume={},
|
150 |
+
number={},
|
151 |
+
pages={1-1},
|
152 |
+
doi={10.1109/TPAMI.2021.3117983}
|
153 |
+
}
|
154 |
+
```
|
155 |
+
|
156 |
+
A special note of gratitude to the team behind the DOTA datasets for their commendable effort in curating this dataset. For an exhaustive understanding of the dataset and its nuances, please visit the [official DOTA website](https://captain-whu.github.io/DOTA/index.html).
|
docs/en/datasets/obb/dota8.md
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the versatile DOTA8 dataset, perfect for testing and debugging oriented detection models. Learn how to get started with YOLOv8-obb model training.
|
4 |
+
keywords: Ultralytics, YOLOv8, oriented detection, DOTA8 dataset, dataset, model training, YAML
|
5 |
+
---
|
6 |
+
|
7 |
+
# DOTA8 Dataset
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
[Ultralytics](https://ultralytics.com) DOTA8 is a small, but versatile oriented object detection dataset composed of the first 8 images of 8 images of the split DOTAv1 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
|
12 |
+
|
13 |
+
This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com) and [YOLOv8](https://github.com/ultralytics/ultralytics).
|
14 |
+
|
15 |
+
## Dataset YAML
|
16 |
+
|
17 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the DOTA8 dataset, the `dota8.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dota8.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/dota8.yaml).
|
18 |
+
|
19 |
+
!!! Example "ultralytics/cfg/datasets/dota8.yaml"
|
20 |
+
|
21 |
+
```yaml
|
22 |
+
--8<-- "ultralytics/cfg/datasets/dota8.yaml"
|
23 |
+
```
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
To train a YOLOv8n-obb model on the DOTA8 dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
28 |
+
|
29 |
+
!!! Example "Train Example"
|
30 |
+
|
31 |
+
=== "Python"
|
32 |
+
|
33 |
+
```python
|
34 |
+
from ultralytics import YOLO
|
35 |
+
|
36 |
+
# Load a model
|
37 |
+
model = YOLO('yolov8n-obb.pt') # load a pretrained model (recommended for training)
|
38 |
+
|
39 |
+
# Train the model
|
40 |
+
results = model.train(data='dota8.yaml', epochs=100, imgsz=640)
|
41 |
+
```
|
42 |
+
|
43 |
+
=== "CLI"
|
44 |
+
|
45 |
+
```bash
|
46 |
+
# Start training from a pretrained *.pt model
|
47 |
+
yolo obb train data=dota8.yaml model=yolov8n-obb.pt epochs=100 imgsz=640
|
48 |
+
```
|
49 |
+
|
50 |
+
## Sample Images and Annotations
|
51 |
+
|
52 |
+
Here are some examples of images from the DOTA8 dataset, along with their corresponding annotations:
|
53 |
+
|
54 |
+
<img src="https://github.com/Laughing-q/assets/assets/61612323/965d3ff7-5b9b-4add-b62e-9795921b60de" alt="Dataset sample image" width="800">
|
55 |
+
|
56 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
57 |
+
|
58 |
+
The example showcases the variety and complexity of the images in the DOTA8 dataset and the benefits of using mosaicing during the training process.
|
59 |
+
|
60 |
+
## Citations and Acknowledgments
|
61 |
+
|
62 |
+
If you use the DOTA dataset in your research or development work, please cite the following paper:
|
63 |
+
|
64 |
+
!!! Quote ""
|
65 |
+
|
66 |
+
=== "BibTeX"
|
67 |
+
|
68 |
+
```bibtex
|
69 |
+
@article{9560031,
|
70 |
+
author={Ding, Jian and Xue, Nan and Xia, Gui-Song and Bai, Xiang and Yang, Wen and Yang, Michael and Belongie, Serge and Luo, Jiebo and Datcu, Mihai and Pelillo, Marcello and Zhang, Liangpei},
|
71 |
+
journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
|
72 |
+
title={Object Detection in Aerial Images: A Large-Scale Benchmark and Challenges},
|
73 |
+
year={2021},
|
74 |
+
volume={},
|
75 |
+
number={},
|
76 |
+
pages={1-1},
|
77 |
+
doi={10.1109/TPAMI.2021.3117983}
|
78 |
+
}
|
79 |
+
```
|
80 |
+
|
81 |
+
A special note of gratitude to the team behind the DOTA datasets for their commendable effort in curating this dataset. For an exhaustive understanding of the dataset and its nuances, please visit the [official DOTA website](https://captain-whu.github.io/DOTA/index.html).
|
docs/en/datasets/obb/index.md
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Dive deep into various oriented bounding box (OBB) dataset formats compatible with Ultralytics YOLO models. Grasp the nuances of using and converting datasets to this format.
|
4 |
+
keywords: Ultralytics, YOLO, oriented bounding boxes, OBB, dataset formats, label formats, DOTA v2, data conversion
|
5 |
+
---
|
6 |
+
|
7 |
+
# Oriented Bounding Box (OBB) Datasets Overview
|
8 |
+
|
9 |
+
Training a precise object detection model with oriented bounding boxes (OBB) requires a thorough dataset. This guide explains the various OBB dataset formats compatible with Ultralytics YOLO models, offering insights into their structure, application, and methods for format conversions.
|
10 |
+
|
11 |
+
## Supported OBB Dataset Formats
|
12 |
+
|
13 |
+
### YOLO OBB Format
|
14 |
+
|
15 |
+
The YOLO OBB format designates bounding boxes by their four corner points with coordinates normalized between 0 and 1. It follows this format:
|
16 |
+
|
17 |
+
```bash
|
18 |
+
class_index, x1, y1, x2, y2, x3, y3, x4, y4
|
19 |
+
```
|
20 |
+
|
21 |
+
Internally, YOLO processes losses and outputs in the `xywhr` format, which represents the bounding box's center point (xy), width, height, and rotation.
|
22 |
+
|
23 |
+
<p align="center"><img width="800" src="https://user-images.githubusercontent.com/26833433/259471881-59020fe2-09a4-4dcc-acce-9b0f7cfa40ee.png" alt="OBB format examples"></p>
|
24 |
+
|
25 |
+
An example of a `*.txt` label file for the above image, which contains an object of class `0` in OBB format, could look like:
|
26 |
+
|
27 |
+
```bash
|
28 |
+
0 0.780811 0.743961 0.782371 0.74686 0.777691 0.752174 0.776131 0.749758
|
29 |
+
```
|
30 |
+
|
31 |
+
## Usage
|
32 |
+
|
33 |
+
To train a model using these OBB formats:
|
34 |
+
|
35 |
+
!!! Example
|
36 |
+
|
37 |
+
=== "Python"
|
38 |
+
|
39 |
+
```python
|
40 |
+
from ultralytics import YOLO
|
41 |
+
|
42 |
+
# Create a new YOLOv8n-OBB model from scratch
|
43 |
+
model = YOLO('yolov8n-obb.yaml')
|
44 |
+
|
45 |
+
# Train the model on the DOTAv2 dataset
|
46 |
+
results = model.train(data='DOTAv1.yaml', epochs=100, imgsz=640)
|
47 |
+
```
|
48 |
+
|
49 |
+
=== "CLI"
|
50 |
+
|
51 |
+
```bash
|
52 |
+
# Train a new YOLOv8n-OBB model on the DOTAv2 dataset
|
53 |
+
yolo detect train data=DOTAv1.yaml model=yolov8n.pt epochs=100 imgsz=640
|
54 |
+
```
|
55 |
+
|
56 |
+
## Supported Datasets
|
57 |
+
|
58 |
+
Currently, the following datasets with Oriented Bounding Boxes are supported:
|
59 |
+
|
60 |
+
- [**DOTA v2**](dota-v2.md): DOTA (A Large-scale Dataset for Object Detection in Aerial Images) version 2, emphasizes detection from aerial perspectives and contains oriented bounding boxes with 1.7 million instances and 11,268 images.
|
61 |
+
|
62 |
+
- [**DOTA8**](dota8.md): A small, 8-image subset of the full DOTA dataset suitable for testing workflows and Continuous Integration (CI) checks of OBB training in the `ultralytics` repository.
|
63 |
+
|
64 |
+
### Incorporating your own OBB dataset
|
65 |
+
|
66 |
+
For those looking to introduce their own datasets with oriented bounding boxes, ensure compatibility with the "YOLO OBB format" mentioned above. Convert your annotations to this required format and detail the paths, classes, and class names in a corresponding YAML configuration file.
|
67 |
+
|
68 |
+
## Convert Label Formats
|
69 |
+
|
70 |
+
### DOTA Dataset Format to YOLO OBB Format
|
71 |
+
|
72 |
+
Transitioning labels from the DOTA dataset format to the YOLO OBB format can be achieved with this script:
|
73 |
+
|
74 |
+
!!! Example
|
75 |
+
|
76 |
+
=== "Python"
|
77 |
+
|
78 |
+
```python
|
79 |
+
from ultralytics.data.converter import convert_dota_to_yolo_obb
|
80 |
+
|
81 |
+
convert_dota_to_yolo_obb('path/to/DOTA')
|
82 |
+
```
|
83 |
+
|
84 |
+
This conversion mechanism is instrumental for datasets in the DOTA format, ensuring alignment with the Ultralytics YOLO OBB format.
|
85 |
+
|
86 |
+
It's imperative to validate the compatibility of the dataset with your model and adhere to the necessary format conventions. Properly structured datasets are pivotal for training efficient object detection models with oriented bounding boxes.
|
docs/en/datasets/pose/coco.md
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Detailed guide on the special COCO-Pose Dataset in Ultralytics. Learn about its key features, structure, and usage in pose estimation tasks with YOLO.
|
4 |
+
keywords: Ultralytics YOLO, COCO-Pose Dataset, Deep Learning, Pose Estimation, Training Models, Dataset YAML, openpose, YOLO
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO-Pose Dataset
|
8 |
+
|
9 |
+
The [COCO-Pose](https://cocodataset.org/#keypoints-2017) dataset is a specialized version of the COCO (Common Objects in Context) dataset, designed for pose estimation tasks. It leverages the COCO Keypoints 2017 images and labels to enable the training of models like YOLO for pose estimation tasks.
|
10 |
+
|
11 |
+
![Pose sample image](https://user-images.githubusercontent.com/26833433/277141128-cd62d09e-1eb0-4d20-9938-c55239a5cb76.jpg)
|
12 |
+
|
13 |
+
## Key Features
|
14 |
+
|
15 |
+
- COCO-Pose builds upon the COCO Keypoints 2017 dataset which contains 200K images labeled with keypoints for pose estimation tasks.
|
16 |
+
- The dataset supports 17 keypoints for human figures, facilitating detailed pose estimation.
|
17 |
+
- Like COCO, it provides standardized evaluation metrics, including Object Keypoint Similarity (OKS) for pose estimation tasks, making it suitable for comparing model performance.
|
18 |
+
|
19 |
+
## Dataset Structure
|
20 |
+
|
21 |
+
The COCO-Pose dataset is split into three subsets:
|
22 |
+
|
23 |
+
1. **Train2017**: This subset contains a portion of the 118K images from the COCO dataset, annotated for training pose estimation models.
|
24 |
+
2. **Val2017**: This subset has a selection of images used for validation purposes during model training.
|
25 |
+
3. **Test2017**: This subset consists of images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7384) for performance evaluation.
|
26 |
+
|
27 |
+
## Applications
|
28 |
+
|
29 |
+
The COCO-Pose dataset is specifically used for training and evaluating deep learning models in keypoint detection and pose estimation tasks, such as OpenPose. The dataset's large number of annotated images and standardized evaluation metrics make it an essential resource for computer vision researchers and practitioners focused on pose estimation.
|
30 |
+
|
31 |
+
## Dataset YAML
|
32 |
+
|
33 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Pose dataset, the `coco-pose.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco-pose.yaml).
|
34 |
+
|
35 |
+
!!! Example "ultralytics/cfg/datasets/coco-pose.yaml"
|
36 |
+
|
37 |
+
```yaml
|
38 |
+
--8<-- "ultralytics/cfg/datasets/coco-pose.yaml"
|
39 |
+
```
|
40 |
+
|
41 |
+
## Usage
|
42 |
+
|
43 |
+
To train a YOLOv8n-pose model on the COCO-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
44 |
+
|
45 |
+
!!! Example "Train Example"
|
46 |
+
|
47 |
+
=== "Python"
|
48 |
+
|
49 |
+
```python
|
50 |
+
from ultralytics import YOLO
|
51 |
+
|
52 |
+
# Load a model
|
53 |
+
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
|
54 |
+
|
55 |
+
# Train the model
|
56 |
+
results = model.train(data='coco-pose.yaml', epochs=100, imgsz=640)
|
57 |
+
```
|
58 |
+
|
59 |
+
=== "CLI"
|
60 |
+
|
61 |
+
```bash
|
62 |
+
# Start training from a pretrained *.pt model
|
63 |
+
yolo detect train data=coco-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
|
64 |
+
```
|
65 |
+
|
66 |
+
## Sample Images and Annotations
|
67 |
+
|
68 |
+
The COCO-Pose dataset contains a diverse set of images with human figures annotated with keypoints. Here are some examples of images from the dataset, along with their corresponding annotations:
|
69 |
+
|
70 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690150-a9dc0bd0-7ad9-4b78-a30f-189ed727ea0e.jpg)
|
71 |
+
|
72 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
73 |
+
|
74 |
+
The example showcases the variety and complexity of the images in the COCO-Pose dataset and the benefits of using mosaicing during the training process.
|
75 |
+
|
76 |
+
## Citations and Acknowledgments
|
77 |
+
|
78 |
+
If you use the COCO-Pose dataset in your research or development work, please cite the following paper:
|
79 |
+
|
80 |
+
!!! Quote ""
|
81 |
+
|
82 |
+
=== "BibTeX"
|
83 |
+
|
84 |
+
```bibtex
|
85 |
+
@misc{lin2015microsoft,
|
86 |
+
title={Microsoft COCO: Common Objects in Context},
|
87 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
88 |
+
year={2015},
|
89 |
+
eprint={1405.0312},
|
90 |
+
archivePrefix={arXiv},
|
91 |
+
primaryClass={cs.CV}
|
92 |
+
}
|
93 |
+
```
|
94 |
+
|
95 |
+
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO-Pose dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/pose/coco8-pose.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the versatile COCO8-Pose dataset, perfect for testing and debugging pose detection models. Learn how to get started with YOLOv8-pose model training.
|
4 |
+
keywords: Ultralytics, YOLOv8, pose detection, COCO8-Pose dataset, dataset, model training, YAML
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO8-Pose Dataset
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
[Ultralytics](https://ultralytics.com) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging object detection models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
|
12 |
+
|
13 |
+
This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com) and [YOLOv8](https://github.com/ultralytics/ultralytics).
|
14 |
+
|
15 |
+
## Dataset YAML
|
16 |
+
|
17 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO8-Pose dataset, the `coco8-pose.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-pose.yaml).
|
18 |
+
|
19 |
+
!!! Example "ultralytics/cfg/datasets/coco8-pose.yaml"
|
20 |
+
|
21 |
+
```yaml
|
22 |
+
--8<-- "ultralytics/cfg/datasets/coco8-pose.yaml"
|
23 |
+
```
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
To train a YOLOv8n-pose model on the COCO8-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
28 |
+
|
29 |
+
!!! Example "Train Example"
|
30 |
+
|
31 |
+
=== "Python"
|
32 |
+
|
33 |
+
```python
|
34 |
+
from ultralytics import YOLO
|
35 |
+
|
36 |
+
# Load a model
|
37 |
+
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
|
38 |
+
|
39 |
+
# Train the model
|
40 |
+
results = model.train(data='coco8-pose.yaml', epochs=100, imgsz=640)
|
41 |
+
```
|
42 |
+
|
43 |
+
=== "CLI"
|
44 |
+
|
45 |
+
```bash
|
46 |
+
# Start training from a pretrained *.pt model
|
47 |
+
yolo detect train data=coco8-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
|
48 |
+
```
|
49 |
+
|
50 |
+
## Sample Images and Annotations
|
51 |
+
|
52 |
+
Here are some examples of images from the COCO8-Pose dataset, along with their corresponding annotations:
|
53 |
+
|
54 |
+
<img src="https://user-images.githubusercontent.com/26833433/236818283-52eecb96-fc6a-420d-8a26-d488b352dd4c.jpg" alt="Dataset sample image" width="800">
|
55 |
+
|
56 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
57 |
+
|
58 |
+
The example showcases the variety and complexity of the images in the COCO8-Pose dataset and the benefits of using mosaicing during the training process.
|
59 |
+
|
60 |
+
## Citations and Acknowledgments
|
61 |
+
|
62 |
+
If you use the COCO dataset in your research or development work, please cite the following paper:
|
63 |
+
|
64 |
+
!!! Quote ""
|
65 |
+
|
66 |
+
=== "BibTeX"
|
67 |
+
|
68 |
+
```bibtex
|
69 |
+
@misc{lin2015microsoft,
|
70 |
+
title={Microsoft COCO: Common Objects in Context},
|
71 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
72 |
+
year={2015},
|
73 |
+
eprint={1405.0312},
|
74 |
+
archivePrefix={arXiv},
|
75 |
+
primaryClass={cs.CV}
|
76 |
+
}
|
77 |
+
```
|
78 |
+
|
79 |
+
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/pose/index.md
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Understand the YOLO pose dataset format and learn to use Ultralytics datasets to train your pose estimation models effectively.
|
4 |
+
keywords: Ultralytics, YOLO, pose estimation, datasets, training, YAML, keypoints, COCO-Pose, COCO8-Pose, data conversion
|
5 |
+
---
|
6 |
+
|
7 |
+
# Pose Estimation Datasets Overview
|
8 |
+
|
9 |
+
## Supported Dataset Formats
|
10 |
+
|
11 |
+
### Ultralytics YOLO format
|
12 |
+
|
13 |
+
The dataset label format used for training YOLO pose models is as follows:
|
14 |
+
|
15 |
+
1. One text file per image: Each image in the dataset has a corresponding text file with the same name as the image file and the ".txt" extension.
|
16 |
+
2. One row per object: Each row in the text file corresponds to one object instance in the image.
|
17 |
+
3. Object information per row: Each row contains the following information about the object instance:
|
18 |
+
- Object class index: An integer representing the class of the object (e.g., 0 for person, 1 for car, etc.).
|
19 |
+
- Object center coordinates: The x and y coordinates of the center of the object, normalized to be between 0 and 1.
|
20 |
+
- Object width and height: The width and height of the object, normalized to be between 0 and 1.
|
21 |
+
- Object keypoint coordinates: The keypoints of the object, normalized to be between 0 and 1.
|
22 |
+
|
23 |
+
Here is an example of the label format for pose estimation task:
|
24 |
+
|
25 |
+
Format with Dim = 2
|
26 |
+
|
27 |
+
```
|
28 |
+
<class-index> <x> <y> <width> <height> <px1> <py1> <px2> <py2> ... <pxn> <pyn>
|
29 |
+
```
|
30 |
+
|
31 |
+
Format with Dim = 3
|
32 |
+
|
33 |
+
```
|
34 |
+
<class-index> <x> <y> <width> <height> <px1> <py1> <p1-visibility> <px2> <py2> <p2-visibility> <pxn> <pyn> <p2-visibility>
|
35 |
+
```
|
36 |
+
|
37 |
+
In this format, `<class-index>` is the index of the class for the object,`<x> <y> <width> <height>` are coordinates of bounding box, and `<px1> <py1> <px2> <py2> ... <pxn> <pyn>` are the pixel coordinates of the keypoints. The coordinates are separated by spaces.
|
38 |
+
|
39 |
+
### Dataset YAML format
|
40 |
+
|
41 |
+
The Ultralytics framework uses a YAML file format to define the dataset and model configuration for training Detection Models. Here is an example of the YAML format used for defining a detection dataset:
|
42 |
+
|
43 |
+
```yaml
|
44 |
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
45 |
+
path: ../datasets/coco8-pose # dataset root dir
|
46 |
+
train: images/train # train images (relative to 'path') 4 images
|
47 |
+
val: images/val # val images (relative to 'path') 4 images
|
48 |
+
test: # test images (optional)
|
49 |
+
|
50 |
+
# Keypoints
|
51 |
+
kpt_shape: [17, 3] # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
|
52 |
+
flip_idx: [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
|
53 |
+
|
54 |
+
# Classes dictionary
|
55 |
+
names:
|
56 |
+
0: person
|
57 |
+
```
|
58 |
+
|
59 |
+
The `train` and `val` fields specify the paths to the directories containing the training and validation images, respectively.
|
60 |
+
|
61 |
+
`names` is a dictionary of class names. The order of the names should match the order of the object class indices in the YOLO dataset files.
|
62 |
+
|
63 |
+
(Optional) if the points are symmetric then need flip_idx, like left-right side of human or face. For example if we assume five keypoints of facial landmark: [left eye, right eye, nose, left mouth, right mouth], and the original index is [0, 1, 2, 3, 4], then flip_idx is [1, 0, 2, 4, 3] (just exchange the left-right index, i.e. 0-1 and 3-4, and do not modify others like nose in this example).
|
64 |
+
|
65 |
+
## Usage
|
66 |
+
|
67 |
+
!!! Example
|
68 |
+
|
69 |
+
=== "Python"
|
70 |
+
|
71 |
+
```python
|
72 |
+
from ultralytics import YOLO
|
73 |
+
|
74 |
+
# Load a model
|
75 |
+
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
|
76 |
+
|
77 |
+
# Train the model
|
78 |
+
results = model.train(data='coco128-pose.yaml', epochs=100, imgsz=640)
|
79 |
+
```
|
80 |
+
=== "CLI"
|
81 |
+
|
82 |
+
```bash
|
83 |
+
# Start training from a pretrained *.pt model
|
84 |
+
yolo detect train data=coco128-pose.yaml model=yolov8n-pose.pt epochs=100 imgsz=640
|
85 |
+
```
|
86 |
+
|
87 |
+
## Supported Datasets
|
88 |
+
|
89 |
+
This section outlines the datasets that are compatible with Ultralytics YOLO format and can be used for training pose estimation models:
|
90 |
+
|
91 |
+
### COCO-Pose
|
92 |
+
|
93 |
+
- **Description**: COCO-Pose is a large-scale object detection, segmentation, and pose estimation dataset. It is a subset of the popular COCO dataset and focuses on human pose estimation. COCO-Pose includes multiple keypoints for each human instance.
|
94 |
+
- **Label Format**: Same as Ultralytics YOLO format as described above, with keypoints for human poses.
|
95 |
+
- **Number of Classes**: 1 (Human).
|
96 |
+
- **Keypoints**: 17 keypoints including nose, eyes, ears, shoulders, elbows, wrists, hips, knees, and ankles.
|
97 |
+
- **Usage**: Suitable for training human pose estimation models.
|
98 |
+
- **Additional Notes**: The dataset is rich and diverse, containing over 200k labeled images.
|
99 |
+
- [Read more about COCO-Pose](coco.md)
|
100 |
+
|
101 |
+
### COCO8-Pose
|
102 |
+
|
103 |
+
- **Description**: [Ultralytics](https://ultralytics.com) COCO8-Pose is a small, but versatile pose detection dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation.
|
104 |
+
- **Label Format**: Same as Ultralytics YOLO format as described above, with keypoints for human poses.
|
105 |
+
- **Number of Classes**: 1 (Human).
|
106 |
+
- **Keypoints**: 17 keypoints including nose, eyes, ears, shoulders, elbows, wrists, hips, knees, and ankles.
|
107 |
+
- **Usage**: Suitable for testing and debugging object detection models, or for experimenting with new detection approaches.
|
108 |
+
- **Additional Notes**: COCO8-Pose is ideal for sanity checks and CI checks.
|
109 |
+
- [Read more about COCO8-Pose](coco8-pose.md)
|
110 |
+
|
111 |
+
### Tiger-Pose
|
112 |
+
|
113 |
+
- **Description**: [Ultralytics](https://ultralytics.com) This animal pose dataset comprises 263 images sourced from a [YouTube Video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0), with 210 images allocated for training and 53 for validation.
|
114 |
+
- **Label Format**: Same as Ultralytics YOLO format as described above, with 12 keypoints for animal pose and no visible dimension.
|
115 |
+
- **Number of Classes**: 1 (Tiger).
|
116 |
+
- **Keypoints**: 12 keypoints.
|
117 |
+
- **Usage**: Great for animal pose or any other pose that is not human-based.
|
118 |
+
- [Read more about Tiger-Pose](tiger-pose.md)
|
119 |
+
|
120 |
+
### Adding your own dataset
|
121 |
+
|
122 |
+
If you have your own dataset and would like to use it for training pose estimation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.
|
123 |
+
|
124 |
+
### Conversion Tool
|
125 |
+
|
126 |
+
Ultralytics provides a convenient conversion tool to convert labels from the popular COCO dataset format to YOLO format:
|
127 |
+
|
128 |
+
!!! Example
|
129 |
+
|
130 |
+
=== "Python"
|
131 |
+
|
132 |
+
```python
|
133 |
+
from ultralytics.data.converter import convert_coco
|
134 |
+
|
135 |
+
convert_coco(labels_dir='path/to/coco/annotations/', use_keypoints=True)
|
136 |
+
```
|
137 |
+
|
138 |
+
This conversion tool can be used to convert the COCO dataset or any dataset in the COCO format to the Ultralytics YOLO format. The `use_keypoints` parameter specifies whether to include keypoints (for pose estimation) in the converted labels.
|
docs/en/datasets/pose/tiger-pose.md
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Discover the versatile Tiger-Pose dataset, perfect for testing and debugging pose detection models. Learn how to get started with YOLOv8-pose model training.
|
4 |
+
keywords: Ultralytics, YOLOv8, pose detection, COCO8-Pose dataset, dataset, model training, YAML
|
5 |
+
---
|
6 |
+
|
7 |
+
# Tiger-Pose Dataset
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
[Ultralytics](https://ultralytics.com) introduces the Tiger-Pose dataset, a versatile collection designed for pose estimation tasks. This dataset comprises 263 images sourced from a [YouTube Video](https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUbVGlnZXIgd2Fsa2luZyByZWZlcmVuY2UubXA0), with 210 images allocated for training and 53 for validation. It serves as an excellent resource for testing and troubleshooting pose estimation algorithm.
|
12 |
+
|
13 |
+
Despite its manageable size of 210 images, tiger-pose dataset offers diversity, making it suitable for assessing training pipelines, identifying potential errors, and serving as a valuable preliminary step before working with larger datasets for pose estimation.
|
14 |
+
|
15 |
+
This dataset is intended for use with [Ultralytics HUB](https://hub.ultralytics.com) and [YOLOv8](https://github.com/ultralytics/ultralytics).
|
16 |
+
|
17 |
+
<p align="center">
|
18 |
+
<br>
|
19 |
+
<iframe loading="lazy" width="720" height="405" src="https://www.youtube.com/embed/Gc6K5eKrTNQ"
|
20 |
+
title="YouTube video player" frameborder="0"
|
21 |
+
allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
|
22 |
+
allowfullscreen>
|
23 |
+
</iframe>
|
24 |
+
<br>
|
25 |
+
<strong>Watch:</strong> Train YOLOv8 Pose Model on Tiger-Pose Dataset Using Ultralytics HUB
|
26 |
+
</p>
|
27 |
+
|
28 |
+
## Dataset YAML
|
29 |
+
|
30 |
+
A YAML (Yet Another Markup Language) file serves as the means to specify the configuration details of a dataset. It encompasses crucial data such as file paths, class definitions, and other pertinent information. Specifically, for the `tiger-pose.yaml` file, you can check [Ultralytics Tiger-Pose Dataset Configuration File](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/tiger-pose.yaml).
|
31 |
+
|
32 |
+
!!! Example "ultralytics/cfg/datasets/tiger-pose.yaml"
|
33 |
+
|
34 |
+
```yaml
|
35 |
+
--8<-- "ultralytics/cfg/datasets/tiger-pose.yaml"
|
36 |
+
```
|
37 |
+
|
38 |
+
## Usage
|
39 |
+
|
40 |
+
To train a YOLOv8n-pose model on the Tiger-Pose dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
41 |
+
|
42 |
+
!!! Example "Train Example"
|
43 |
+
|
44 |
+
=== "Python"
|
45 |
+
|
46 |
+
```python
|
47 |
+
from ultralytics import YOLO
|
48 |
+
|
49 |
+
# Load a model
|
50 |
+
model = YOLO('yolov8n-pose.pt') # load a pretrained model (recommended for training)
|
51 |
+
|
52 |
+
# Train the model
|
53 |
+
results = model.train(data='tiger-pose.yaml', epochs=100, imgsz=640)
|
54 |
+
```
|
55 |
+
|
56 |
+
=== "CLI"
|
57 |
+
|
58 |
+
```bash
|
59 |
+
# Start training from a pretrained *.pt model
|
60 |
+
yolo task=pose mode=train data=tiger-pose.yaml model=yolov8n.pt epochs=100 imgsz=640
|
61 |
+
```
|
62 |
+
|
63 |
+
## Sample Images and Annotations
|
64 |
+
|
65 |
+
Here are some examples of images from the Tiger-Pose dataset, along with their corresponding annotations:
|
66 |
+
|
67 |
+
<img src="https://user-images.githubusercontent.com/62513924/272491921-c963d2bf-505f-4a15-abd7-259de302cffa.jpg" alt="Dataset sample image" width="100%">
|
68 |
+
|
69 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
70 |
+
|
71 |
+
The example showcases the variety and complexity of the images in the Tiger-Pose dataset and the benefits of using mosaicing during the training process.
|
72 |
+
|
73 |
+
## Inference Example
|
74 |
+
|
75 |
+
!!! Example "Inference Example"
|
76 |
+
|
77 |
+
=== "Python"
|
78 |
+
|
79 |
+
```python
|
80 |
+
from ultralytics import YOLO
|
81 |
+
|
82 |
+
# Load a model
|
83 |
+
model = YOLO('path/to/best.pt') # load a tiger-pose trained model
|
84 |
+
|
85 |
+
# Run inference
|
86 |
+
results = model.predict(source="https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUYdGlnZXIgd2Fsa2luZyByZWZlcmVuY2Ug" show=True)
|
87 |
+
```
|
88 |
+
|
89 |
+
=== "CLI"
|
90 |
+
|
91 |
+
```bash
|
92 |
+
# Run inference using a tiger-pose trained model
|
93 |
+
yolo task=pose mode=predict source="https://www.youtube.com/watch?v=MIBAT6BGE6U&pp=ygUYdGlnZXIgd2Fsa2luZyByZWZlcmVuY2Ug" show=True model="path/to/best.pt"
|
94 |
+
```
|
95 |
+
|
96 |
+
## Citations and Acknowledgments
|
97 |
+
|
98 |
+
The dataset has been released available under the [AGPL-3.0 License](https://github.com/ultralytics/ultralytics/blob/main/LICENSE).
|
docs/en/datasets/segment/carparts-seg.md
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the Carparts Segmentation using Ultralytics YOLOv8 Dataset, a large-scale benchmark for Vehicle Maintenance, and learn how to train a YOLO model using it.
|
4 |
+
keywords: CarParts Segmentation Dataset, Ultralytics, Vehicle Analytics, Spare parts Detection, YOLO model, object detection, object tracking
|
5 |
+
---
|
6 |
+
|
7 |
+
# Roboflow Universe Carparts Segmentation Dataset
|
8 |
+
|
9 |
+
The [Roboflow](https://roboflow.com/?ref=ultralytics) [Carparts Segmentation Dataset](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm) is a curated collection of images and videos designed for computer vision applications, specifically focusing on segmentation tasks related to car parts. This dataset provides a diverse set of visuals captured from multiple perspectives, offering valuable annotated examples for training and testing segmentation models.
|
10 |
+
|
11 |
+
Whether you're working on automotive research, developing AI solutions for vehicle maintenance, or exploring computer vision applications, the Carparts Segmentation Dataset serves as a valuable resource for enhancing accuracy and efficiency in your projects.
|
12 |
+
|
13 |
+
## Dataset Structure
|
14 |
+
|
15 |
+
The data distribution within the Carparts Segmentation Dataset is organized as outlined below:
|
16 |
+
|
17 |
+
- **Training set**: Includes 3156 images, each accompanied by its corresponding annotations.
|
18 |
+
- **Testing set**: Comprises 276 images, with each one paired with its respective annotations.
|
19 |
+
- **Validation set**: Consists of 401 images, each having corresponding annotations.
|
20 |
+
|
21 |
+
## Applications
|
22 |
+
|
23 |
+
Carparts Segmentation finds applications in automotive quality control, auto repair, e-commerce cataloging, traffic monitoring, autonomous vehicles, insurance processing, recycling, and smart city initiatives. It streamlines processes by accurately identifying and categorizing different vehicle components, contributing to efficiency and automation in various industries.
|
24 |
+
|
25 |
+
## Dataset YAML
|
26 |
+
|
27 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the Package Segmentation dataset, the `carparts-seg.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/carparts-seg.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/carparts-seg.yaml).
|
28 |
+
|
29 |
+
!!! Example "ultralytics/cfg/datasets/carparts-seg.yaml"
|
30 |
+
|
31 |
+
```yaml
|
32 |
+
--8<-- "ultralytics/cfg/datasets/carparts-seg.yaml"
|
33 |
+
```
|
34 |
+
|
35 |
+
## Usage
|
36 |
+
|
37 |
+
To train Ultralytics YOLOv8n model on the Carparts Segmentation dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
38 |
+
|
39 |
+
!!! Example "Train Example"
|
40 |
+
|
41 |
+
=== "Python"
|
42 |
+
|
43 |
+
```python
|
44 |
+
from ultralytics import YOLO
|
45 |
+
|
46 |
+
# Load a model
|
47 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
48 |
+
|
49 |
+
# Train the model
|
50 |
+
results = model.train(data='carparts-seg.yaml', epochs=100, imgsz=640)
|
51 |
+
```
|
52 |
+
|
53 |
+
=== "CLI"
|
54 |
+
|
55 |
+
```bash
|
56 |
+
# Start training from a pretrained *.pt model
|
57 |
+
yolo segment train data=carparts-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
|
58 |
+
```
|
59 |
+
|
60 |
+
## Sample Data and Annotations
|
61 |
+
|
62 |
+
The Carparts Segmentation dataset includes a diverse array of images and videos taken from various perspectives. Below, you'll find examples of data from the dataset along with their corresponding annotations:
|
63 |
+
|
64 |
+
![Dataset sample image](https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/55da8284-a637-4858-aa1c-fc22d33a9c43)
|
65 |
+
|
66 |
+
- This image illustrates object segmentation within a sample, featuring annotated bounding boxes with masks surrounding identified objects. The dataset consists of a varied set of images captured in various locations, environments, and densities, serving as a comprehensive resource for crafting models specific to this task.
|
67 |
+
- This instance highlights the diversity and complexity inherent in the dataset, emphasizing the crucial role of high-quality data in computer vision tasks, particularly in the realm of car parts segmentation.
|
68 |
+
|
69 |
+
## Citations and Acknowledgments
|
70 |
+
|
71 |
+
If you integrate the Carparts Segmentation dataset into your research or development projects, please make reference to the following paper:
|
72 |
+
|
73 |
+
!!! Quote ""
|
74 |
+
|
75 |
+
=== "BibTeX"
|
76 |
+
```bibtex
|
77 |
+
@misc{ car-seg-un1pm_dataset,
|
78 |
+
title = { car-seg Dataset },
|
79 |
+
type = { Open Source Dataset },
|
80 |
+
author = { Gianmarco Russo },
|
81 |
+
howpublished = { \url{ https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm } },
|
82 |
+
url = { https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm },
|
83 |
+
journal = { Roboflow Universe },
|
84 |
+
publisher = { Roboflow },
|
85 |
+
year = { 2023 },
|
86 |
+
month = { nov },
|
87 |
+
note = { visited on 2024-01-24 },
|
88 |
+
}
|
89 |
+
```
|
90 |
+
|
91 |
+
We extend our thanks to the Roboflow team for their dedication in developing and managing the Carparts Segmentation dataset, a valuable resource for vehicle maintenance and research projects. For additional details about the Carparts Segmentation dataset and its creators, please visit the [CarParts Segmentation Dataset Page](https://universe.roboflow.com/gianmarco-russo-vt9xr/car-seg-un1pm).
|
docs/en/datasets/segment/coco.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the possibilities of the COCO-Seg dataset, designed for object instance segmentation and YOLO model training. Discover key features, dataset structure, applications, and usage.
|
4 |
+
keywords: Ultralytics, YOLO, COCO-Seg, dataset, instance segmentation, model training, deep learning, computer vision
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO-Seg Dataset
|
8 |
+
|
9 |
+
The [COCO-Seg](https://cocodataset.org/#home) dataset, an extension of the COCO (Common Objects in Context) dataset, is specially designed to aid research in object instance segmentation. It uses the same images as COCO but introduces more detailed segmentation annotations. This dataset is a crucial resource for researchers and developers working on instance segmentation tasks, especially for training YOLO models.
|
10 |
+
|
11 |
+
## Key Features
|
12 |
+
|
13 |
+
- COCO-Seg retains the original 330K images from COCO.
|
14 |
+
- The dataset consists of the same 80 object categories found in the original COCO dataset.
|
15 |
+
- Annotations now include more detailed instance segmentation masks for each object in the images.
|
16 |
+
- COCO-Seg provides standardized evaluation metrics like mean Average Precision (mAP) for object detection, and mean Average Recall (mAR) for instance segmentation tasks, enabling effective comparison of model performance.
|
17 |
+
|
18 |
+
## Dataset Structure
|
19 |
+
|
20 |
+
The COCO-Seg dataset is partitioned into three subsets:
|
21 |
+
|
22 |
+
1. **Train2017**: This subset contains 118K images for training instance segmentation models.
|
23 |
+
2. **Val2017**: This subset includes 5K images used for validation purposes during model training.
|
24 |
+
3. **Test2017**: This subset encompasses 20K images used for testing and benchmarking the trained models. Ground truth annotations for this subset are not publicly available, and the results are submitted to the [COCO evaluation server](https://codalab.lisn.upsaclay.fr/competitions/7383) for performance evaluation.
|
25 |
+
|
26 |
+
## Applications
|
27 |
+
|
28 |
+
COCO-Seg is widely used for training and evaluating deep learning models in instance segmentation, such as the YOLO models. The large number of annotated images, the diversity of object categories, and the standardized evaluation metrics make it an indispensable resource for computer vision researchers and practitioners.
|
29 |
+
|
30 |
+
## Dataset YAML
|
31 |
+
|
32 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO-Seg dataset, the `coco.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco.yaml).
|
33 |
+
|
34 |
+
!!! Example "ultralytics/cfg/datasets/coco.yaml"
|
35 |
+
|
36 |
+
```yaml
|
37 |
+
--8<-- "ultralytics/cfg/datasets/coco.yaml"
|
38 |
+
```
|
39 |
+
|
40 |
+
## Usage
|
41 |
+
|
42 |
+
To train a YOLOv8n-seg model on the COCO-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
43 |
+
|
44 |
+
!!! Example "Train Example"
|
45 |
+
|
46 |
+
=== "Python"
|
47 |
+
|
48 |
+
```python
|
49 |
+
from ultralytics import YOLO
|
50 |
+
|
51 |
+
# Load a model
|
52 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
53 |
+
|
54 |
+
# Train the model
|
55 |
+
results = model.train(data='coco-seg.yaml', epochs=100, imgsz=640)
|
56 |
+
```
|
57 |
+
|
58 |
+
=== "CLI"
|
59 |
+
|
60 |
+
```bash
|
61 |
+
# Start training from a pretrained *.pt model
|
62 |
+
yolo detect train data=coco-seg.yaml model=yolov8n.pt epochs=100 imgsz=640
|
63 |
+
```
|
64 |
+
|
65 |
+
## Sample Images and Annotations
|
66 |
+
|
67 |
+
COCO-Seg, like its predecessor COCO, contains a diverse set of images with various object categories and complex scenes. However, COCO-Seg introduces more detailed instance segmentation masks for each object in the images. Here are some examples of images from the dataset, along with their corresponding instance segmentation masks:
|
68 |
+
|
69 |
+
![Dataset sample image](https://user-images.githubusercontent.com/26833433/239690696-93fa8765-47a2-4b34-a6e5-516d0d1c725b.jpg)
|
70 |
+
|
71 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This aids the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
72 |
+
|
73 |
+
The example showcases the variety and complexity of the images in the COCO-Seg dataset and the benefits of using mosaicing during the training process.
|
74 |
+
|
75 |
+
## Citations and Acknowledgments
|
76 |
+
|
77 |
+
If you use the COCO-Seg dataset in your research or development work, please cite the original COCO paper and acknowledge the extension to COCO-Seg:
|
78 |
+
|
79 |
+
!!! Quote ""
|
80 |
+
|
81 |
+
=== "BibTeX"
|
82 |
+
|
83 |
+
```bibtex
|
84 |
+
@misc{lin2015microsoft,
|
85 |
+
title={Microsoft COCO: Common Objects in Context},
|
86 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
87 |
+
year={2015},
|
88 |
+
eprint={1405.0312},
|
89 |
+
archivePrefix={arXiv},
|
90 |
+
primaryClass={cs.CV}
|
91 |
+
}
|
92 |
+
```
|
93 |
+
|
94 |
+
We extend our thanks to the COCO Consortium for creating and maintaining this invaluable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/segment/coco8-seg.md
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: 'Discover the COCO8-Seg: a compact but versatile instance segmentation dataset ideal for testing Ultralytics YOLOv8 detection approaches. Complete usage guide included.'
|
4 |
+
keywords: COCO8-Seg dataset, Ultralytics, YOLOv8, instance segmentation, dataset configuration, YAML, YOLOv8n-seg model, mosaiced dataset images
|
5 |
+
---
|
6 |
+
|
7 |
+
# COCO8-Seg Dataset
|
8 |
+
|
9 |
+
## Introduction
|
10 |
+
|
11 |
+
[Ultralytics](https://ultralytics.com) COCO8-Seg is a small, but versatile instance segmentation dataset composed of the first 8 images of the COCO train 2017 set, 4 for training and 4 for validation. This dataset is ideal for testing and debugging segmentation models, or for experimenting with new detection approaches. With 8 images, it is small enough to be easily manageable, yet diverse enough to test training pipelines for errors and act as a sanity check before training larger datasets.
|
12 |
+
|
13 |
+
This dataset is intended for use with Ultralytics [HUB](https://hub.ultralytics.com) and [YOLOv8](https://github.com/ultralytics/ultralytics).
|
14 |
+
|
15 |
+
## Dataset YAML
|
16 |
+
|
17 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the COCO8-Seg dataset, the `coco8-seg.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-seg.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/coco8-seg.yaml).
|
18 |
+
|
19 |
+
!!! Example "ultralytics/cfg/datasets/coco8-seg.yaml"
|
20 |
+
|
21 |
+
```yaml
|
22 |
+
--8<-- "ultralytics/cfg/datasets/coco8-seg.yaml"
|
23 |
+
```
|
24 |
+
|
25 |
+
## Usage
|
26 |
+
|
27 |
+
To train a YOLOv8n-seg model on the COCO8-Seg dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
28 |
+
|
29 |
+
!!! Example "Train Example"
|
30 |
+
|
31 |
+
=== "Python"
|
32 |
+
|
33 |
+
```python
|
34 |
+
from ultralytics import YOLO
|
35 |
+
|
36 |
+
# Load a model
|
37 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
38 |
+
|
39 |
+
# Train the model
|
40 |
+
results = model.train(data='coco8-seg.yaml', epochs=100, imgsz=640)
|
41 |
+
```
|
42 |
+
|
43 |
+
=== "CLI"
|
44 |
+
|
45 |
+
```bash
|
46 |
+
# Start training from a pretrained *.pt model
|
47 |
+
yolo detect train data=coco8-seg.yaml model=yolov8n.pt epochs=100 imgsz=640
|
48 |
+
```
|
49 |
+
|
50 |
+
## Sample Images and Annotations
|
51 |
+
|
52 |
+
Here are some examples of images from the COCO8-Seg dataset, along with their corresponding annotations:
|
53 |
+
|
54 |
+
<img src="https://user-images.githubusercontent.com/26833433/236818387-f7bde7df-caaa-46d1-8341-1f7504cd11a1.jpg" alt="Dataset sample image" width="800">
|
55 |
+
|
56 |
+
- **Mosaiced Image**: This image demonstrates a training batch composed of mosaiced dataset images. Mosaicing is a technique used during training that combines multiple images into a single image to increase the variety of objects and scenes within each training batch. This helps improve the model's ability to generalize to different object sizes, aspect ratios, and contexts.
|
57 |
+
|
58 |
+
The example showcases the variety and complexity of the images in the COCO8-Seg dataset and the benefits of using mosaicing during the training process.
|
59 |
+
|
60 |
+
## Citations and Acknowledgments
|
61 |
+
|
62 |
+
If you use the COCO dataset in your research or development work, please cite the following paper:
|
63 |
+
|
64 |
+
!!! Quote ""
|
65 |
+
|
66 |
+
=== "BibTeX"
|
67 |
+
|
68 |
+
```bibtex
|
69 |
+
@misc{lin2015microsoft,
|
70 |
+
title={Microsoft COCO: Common Objects in Context},
|
71 |
+
author={Tsung-Yi Lin and Michael Maire and Serge Belongie and Lubomir Bourdev and Ross Girshick and James Hays and Pietro Perona and Deva Ramanan and C. Lawrence Zitnick and Piotr Dollár},
|
72 |
+
year={2015},
|
73 |
+
eprint={1405.0312},
|
74 |
+
archivePrefix={arXiv},
|
75 |
+
primaryClass={cs.CV}
|
76 |
+
}
|
77 |
+
```
|
78 |
+
|
79 |
+
We would like to acknowledge the COCO Consortium for creating and maintaining this valuable resource for the computer vision community. For more information about the COCO dataset and its creators, visit the [COCO dataset website](https://cocodataset.org/#home).
|
docs/en/datasets/segment/crack-seg.md
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the Crack Segmentation using Ultralytics YOLOv8 Dataset, a large-scale benchmark for road safety analysis, and learn how to train a YOLO model using it.
|
4 |
+
keywords: Crack Segmentation Dataset, Ultralytics, road cracks monitoring, YOLO model, object detection, object tracking, road safety
|
5 |
+
---
|
6 |
+
|
7 |
+
# Roboflow Universe Crack Segmentation Dataset
|
8 |
+
|
9 |
+
The [Roboflow](https://roboflow.com/?ref=ultralytics) [Crack Segmentation Dataset](https://universe.roboflow.com/university-bswxt/crack-bphdr) stands out as an extensive resource designed specifically for individuals involved in transportation and public safety studies. It is equally beneficial for those working on the development of self-driving car models or simply exploring computer vision applications for recreational purposes.
|
10 |
+
|
11 |
+
Comprising a total of 4029 static images captured from diverse road and wall scenarios, this dataset emerges as a valuable asset for tasks related to crack segmentation. Whether you are delving into the intricacies of transportation research or seeking to enhance the accuracy of your self-driving car models, this dataset provides a rich and varied collection of images to support your endeavors.
|
12 |
+
|
13 |
+
## Dataset Structure
|
14 |
+
|
15 |
+
The division of data within the Crack Segmentation Dataset is outlined as follows:
|
16 |
+
|
17 |
+
- **Training set**: Consists of 3717 images with corresponding annotations.
|
18 |
+
- **Testing set**: Comprises 112 images along with their respective annotations.
|
19 |
+
- **Validation set**: Includes 200 images with their corresponding annotations.
|
20 |
+
|
21 |
+
## Applications
|
22 |
+
|
23 |
+
Crack segmentation finds practical applications in infrastructure maintenance, aiding in the identification and assessment of structural damage. It also plays a crucial role in enhancing road safety by enabling automated systems to detect and address pavement cracks for timely repairs.
|
24 |
+
|
25 |
+
## Dataset YAML
|
26 |
+
|
27 |
+
A YAML (Yet Another Markup Language) file is employed to outline the configuration of the dataset, encompassing details about paths, classes, and other pertinent information. Specifically, for the Crack Segmentation dataset, the `crack-seg.yaml` file is managed and accessible at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/crack-seg.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/crack-seg.yaml).
|
28 |
+
|
29 |
+
!!! Example "ultralytics/cfg/datasets/crack-seg.yaml"
|
30 |
+
|
31 |
+
```yaml
|
32 |
+
--8<-- "ultralytics/cfg/datasets/crack-seg.yaml"
|
33 |
+
```
|
34 |
+
|
35 |
+
## Usage
|
36 |
+
|
37 |
+
To train Ultralytics YOLOv8n model on the Crack Segmentation dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
38 |
+
|
39 |
+
!!! Example "Train Example"
|
40 |
+
|
41 |
+
=== "Python"
|
42 |
+
|
43 |
+
```python
|
44 |
+
from ultralytics import YOLO
|
45 |
+
|
46 |
+
# Load a model
|
47 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
48 |
+
|
49 |
+
# Train the model
|
50 |
+
results = model.train(data='crack-seg.yaml', epochs=100, imgsz=640)
|
51 |
+
```
|
52 |
+
|
53 |
+
=== "CLI"
|
54 |
+
|
55 |
+
```bash
|
56 |
+
# Start training from a pretrained *.pt model
|
57 |
+
yolo segment train data=crack-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
|
58 |
+
```
|
59 |
+
|
60 |
+
## Sample Data and Annotations
|
61 |
+
|
62 |
+
The Crack Segmentation dataset comprises a varied collection of images and videos captured from multiple perspectives. Below are instances of data from the dataset, accompanied by their respective annotations:
|
63 |
+
|
64 |
+
![Dataset sample image](https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/40ccc20a-9593-412f-b028-643d4a904d0e)
|
65 |
+
|
66 |
+
- This image presents an example of image object segmentation, featuring annotated bounding boxes with masks outlining identified objects. The dataset includes a diverse array of images taken in different locations, environments, and densities, making it a comprehensive resource for developing models designed for this particular task.
|
67 |
+
|
68 |
+
- The example underscores the diversity and complexity found in the Crack segmentation dataset, emphasizing the crucial role of high-quality data in computer vision tasks.
|
69 |
+
|
70 |
+
## Citations and Acknowledgments
|
71 |
+
|
72 |
+
If you incorporate the crack segmentation dataset into your research or development endeavors, kindly reference the following paper:
|
73 |
+
|
74 |
+
!!! Quote ""
|
75 |
+
|
76 |
+
=== "BibTeX"
|
77 |
+
|
78 |
+
```bibtex
|
79 |
+
@misc{ crack-bphdr_dataset,
|
80 |
+
title = { crack Dataset },
|
81 |
+
type = { Open Source Dataset },
|
82 |
+
author = { University },
|
83 |
+
howpublished = { \url{ https://universe.roboflow.com/university-bswxt/crack-bphdr } },
|
84 |
+
url = { https://universe.roboflow.com/university-bswxt/crack-bphdr },
|
85 |
+
journal = { Roboflow Universe },
|
86 |
+
publisher = { Roboflow },
|
87 |
+
year = { 2022 },
|
88 |
+
month = { dec },
|
89 |
+
note = { visited on 2024-01-23 },
|
90 |
+
}
|
91 |
+
```
|
92 |
+
|
93 |
+
We would like to acknowledge the Roboflow team for creating and maintaining the Crack Segmentation dataset as a valuable resource for the road safety and research projects. For more information about the Crack segmentation dataset and its creators, visit the [Crack Segmentation Dataset Page](https://universe.roboflow.com/university-bswxt/crack-bphdr).
|
docs/en/datasets/segment/index.md
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Learn how Ultralytics YOLO supports various dataset formats for instance segmentation. This guide includes information on data conversions, auto-annotations, and dataset usage.
|
4 |
+
keywords: Ultralytics, YOLO, Instance Segmentation, Dataset, YAML, COCO, Auto-Annotation, Image Segmentation
|
5 |
+
---
|
6 |
+
|
7 |
+
# Instance Segmentation Datasets Overview
|
8 |
+
|
9 |
+
## Supported Dataset Formats
|
10 |
+
|
11 |
+
### Ultralytics YOLO format
|
12 |
+
|
13 |
+
The dataset label format used for training YOLO segmentation models is as follows:
|
14 |
+
|
15 |
+
1. One text file per image: Each image in the dataset has a corresponding text file with the same name as the image file and the ".txt" extension.
|
16 |
+
2. One row per object: Each row in the text file corresponds to one object instance in the image.
|
17 |
+
3. Object information per row: Each row contains the following information about the object instance:
|
18 |
+
- Object class index: An integer representing the class of the object (e.g., 0 for person, 1 for car, etc.).
|
19 |
+
- Object bounding coordinates: The bounding coordinates around the mask area, normalized to be between 0 and 1.
|
20 |
+
|
21 |
+
The format for a single row in the segmentation dataset file is as follows:
|
22 |
+
|
23 |
+
```
|
24 |
+
<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>
|
25 |
+
```
|
26 |
+
|
27 |
+
In this format, `<class-index>` is the index of the class for the object, and `<x1> <y1> <x2> <y2> ... <xn> <yn>` are the bounding coordinates of the object's segmentation mask. The coordinates are separated by spaces.
|
28 |
+
|
29 |
+
Here is an example of the YOLO dataset format for a single image with two objects made up of a 3-point segment and a 5-point segment.
|
30 |
+
|
31 |
+
```
|
32 |
+
0 0.681 0.485 0.670 0.487 0.676 0.487
|
33 |
+
1 0.504 0.000 0.501 0.004 0.498 0.004 0.493 0.010 0.492 0.0104
|
34 |
+
```
|
35 |
+
|
36 |
+
!!! Tip "Tip"
|
37 |
+
|
38 |
+
- The length of each row does **not** have to be equal.
|
39 |
+
- Each segmentation label must have a **minimum of 3 xy points**: `<class-index> <x1> <y1> <x2> <y2> <x3> <y3>`
|
40 |
+
|
41 |
+
### Dataset YAML format
|
42 |
+
|
43 |
+
The Ultralytics framework uses a YAML file format to define the dataset and model configuration for training Detection Models. Here is an example of the YAML format used for defining a detection dataset:
|
44 |
+
|
45 |
+
```yaml
|
46 |
+
# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]
|
47 |
+
path: ../datasets/coco8-seg # dataset root dir
|
48 |
+
train: images/train # train images (relative to 'path') 4 images
|
49 |
+
val: images/val # val images (relative to 'path') 4 images
|
50 |
+
test: # test images (optional)
|
51 |
+
|
52 |
+
# Classes (80 COCO classes)
|
53 |
+
names:
|
54 |
+
0: person
|
55 |
+
1: bicycle
|
56 |
+
2: car
|
57 |
+
# ...
|
58 |
+
77: teddy bear
|
59 |
+
78: hair drier
|
60 |
+
79: toothbrush
|
61 |
+
```
|
62 |
+
|
63 |
+
The `train` and `val` fields specify the paths to the directories containing the training and validation images, respectively.
|
64 |
+
|
65 |
+
`names` is a dictionary of class names. The order of the names should match the order of the object class indices in the YOLO dataset files.
|
66 |
+
|
67 |
+
## Usage
|
68 |
+
|
69 |
+
!!! Example
|
70 |
+
|
71 |
+
=== "Python"
|
72 |
+
|
73 |
+
```python
|
74 |
+
from ultralytics import YOLO
|
75 |
+
|
76 |
+
# Load a model
|
77 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
78 |
+
|
79 |
+
# Train the model
|
80 |
+
results = model.train(data='coco128-seg.yaml', epochs=100, imgsz=640)
|
81 |
+
```
|
82 |
+
=== "CLI"
|
83 |
+
|
84 |
+
```bash
|
85 |
+
# Start training from a pretrained *.pt model
|
86 |
+
yolo detect train data=coco128-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
|
87 |
+
```
|
88 |
+
|
89 |
+
## Supported Datasets
|
90 |
+
|
91 |
+
## Supported Datasets
|
92 |
+
|
93 |
+
- [COCO](coco.md): A comprehensive dataset for object detection, segmentation, and captioning, featuring over 200K labeled images across a wide range of categories.
|
94 |
+
|
95 |
+
- [COCO8-seg](coco8-seg.md): A compact, 8-image subset of COCO designed for quick testing of segmentation model training, ideal for CI checks and workflow validation in the `ultralytics` repository.
|
96 |
+
|
97 |
+
- [Carparts-seg](carparts-seg.md): A specialized dataset focused on the segmentation of car parts, ideal for automotive applications. It includes a variety of vehicles with detailed annotations of individual car components.
|
98 |
+
|
99 |
+
- [Crack-seg](crack-seg.md): A dataset tailored for the segmentation of cracks in various surfaces. Essential for infrastructure maintenance and quality control, it provides detailed imagery for training models to identify structural weaknesses.
|
100 |
+
|
101 |
+
- [Package-seg](package-seg.md): A dataset dedicated to the segmentation of different types of packaging materials and shapes. It's particularly useful for logistics and warehouse automation, aiding in the development of systems for package handling and sorting.
|
102 |
+
|
103 |
+
### Adding your own dataset
|
104 |
+
|
105 |
+
If you have your own dataset and would like to use it for training segmentation models with Ultralytics YOLO format, ensure that it follows the format specified above under "Ultralytics YOLO format". Convert your annotations to the required format and specify the paths, number of classes, and class names in the YAML configuration file.
|
106 |
+
|
107 |
+
## Port or Convert Label Formats
|
108 |
+
|
109 |
+
### COCO Dataset Format to YOLO Format
|
110 |
+
|
111 |
+
You can easily convert labels from the popular COCO dataset format to the YOLO format using the following code snippet:
|
112 |
+
|
113 |
+
!!! Example
|
114 |
+
|
115 |
+
=== "Python"
|
116 |
+
|
117 |
+
```python
|
118 |
+
from ultralytics.data.converter import convert_coco
|
119 |
+
|
120 |
+
convert_coco(labels_dir='path/to/coco/annotations/', use_segments=True)
|
121 |
+
```
|
122 |
+
|
123 |
+
This conversion tool can be used to convert the COCO dataset or any dataset in the COCO format to the Ultralytics YOLO format.
|
124 |
+
|
125 |
+
Remember to double-check if the dataset you want to use is compatible with your model and follows the necessary format conventions. Properly formatted datasets are crucial for training successful object detection models.
|
126 |
+
|
127 |
+
## Auto-Annotation
|
128 |
+
|
129 |
+
Auto-annotation is an essential feature that allows you to generate a segmentation dataset using a pre-trained detection model. It enables you to quickly and accurately annotate a large number of images without the need for manual labeling, saving time and effort.
|
130 |
+
|
131 |
+
### Generate Segmentation Dataset Using a Detection Model
|
132 |
+
|
133 |
+
To auto-annotate your dataset using the Ultralytics framework, you can use the `auto_annotate` function as shown below:
|
134 |
+
|
135 |
+
!!! Example
|
136 |
+
|
137 |
+
=== "Python"
|
138 |
+
|
139 |
+
```python
|
140 |
+
from ultralytics.data.annotator import auto_annotate
|
141 |
+
|
142 |
+
auto_annotate(data="path/to/images", det_model="yolov8x.pt", sam_model='sam_b.pt')
|
143 |
+
```
|
144 |
+
|
145 |
+
Certainly, here is the table updated with code snippets:
|
146 |
+
|
147 |
+
| Argument | Type | Description | Default |
|
148 |
+
|--------------|-------------------------|-------------------------------------------------------------------------------------------------------------|----------------|
|
149 |
+
| `data` | `str` | Path to a folder containing images to be annotated. | `None` |
|
150 |
+
| `det_model` | `str, optional` | Pre-trained YOLO detection model. Defaults to `'yolov8x.pt'`. | `'yolov8x.pt'` |
|
151 |
+
| `sam_model` | `str, optional` | Pre-trained SAM segmentation model. Defaults to `'sam_b.pt'`. | `'sam_b.pt'` |
|
152 |
+
| `device` | `str, optional` | Device to run the models on. Defaults to an empty string (CPU or GPU, if available). | `''` |
|
153 |
+
| `output_dir` | `str or None, optional` | Directory to save the annotated results. Defaults to a `'labels'` folder in the same directory as `'data'`. | `None` |
|
154 |
+
|
155 |
+
The `auto_annotate` function takes the path to your images, along with optional arguments for specifying the pre-trained detection and [SAM segmentation models](../../models/sam.md), the device to run the models on, and the output directory for saving the annotated results.
|
156 |
+
|
157 |
+
By leveraging the power of pre-trained models, auto-annotation can significantly reduce the time and effort required for creating high-quality segmentation datasets. This feature is particularly useful for researchers and developers working with large image collections, as it allows them to focus on model development and evaluation rather than manual annotation.
|
docs/en/datasets/segment/package-seg.md
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Explore the Package Segmentation using Ultralytics YOLOv8 Dataset, a large-scale benchmark for logistics, and learn how to train a YOLO model using it.
|
4 |
+
keywords: Packet Segmentation Dataset, Ultralytics, Manufacturing, Logistics, YOLO model, object detection, object tracking
|
5 |
+
---
|
6 |
+
|
7 |
+
# Roboflow Universe Package Segmentation Dataset
|
8 |
+
|
9 |
+
The [Roboflow](https://roboflow.com/?ref=ultralytics) [Package Segmentation Dataset](https://universe.roboflow.com/factorypackage/factory_package) is a curated collection of images specifically tailored for tasks related to package segmentation in the field of computer vision. This dataset is designed to assist researchers, developers, and enthusiasts working on projects related to package identification, sorting, and handling.
|
10 |
+
|
11 |
+
Containing a diverse set of images showcasing various packages in different contexts and environments, the dataset serves as a valuable resource for training and evaluating segmentation models. Whether you are engaged in logistics, warehouse automation, or any application requiring precise package analysis, the Package Segmentation Dataset provides a targeted and comprehensive set of images to enhance the performance of your computer vision algorithms.
|
12 |
+
|
13 |
+
## Dataset Structure
|
14 |
+
|
15 |
+
The distribution of data in the Package Segmentation Dataset is structured as follows:
|
16 |
+
|
17 |
+
- **Training set**: Encompasses 1920 images accompanied by their corresponding annotations.
|
18 |
+
- **Testing set**: Consists of 89 images, each paired with its respective annotations.
|
19 |
+
- **Validation set**: Comprises 188 images, each with corresponding annotations.
|
20 |
+
|
21 |
+
## Applications
|
22 |
+
|
23 |
+
Package segmentation, facilitated by the Package Segmentation Dataset, is crucial for optimizing logistics, enhancing last-mile delivery, improving manufacturing quality control, and contributing to smart city solutions. From e-commerce to security applications, this dataset is a key resource, fostering innovation in computer vision for diverse and efficient package analysis applications.
|
24 |
+
|
25 |
+
## Dataset YAML
|
26 |
+
|
27 |
+
A YAML (Yet Another Markup Language) file is used to define the dataset configuration. It contains information about the dataset's paths, classes, and other relevant information. In the case of the Package Segmentation dataset, the `package-seg.yaml` file is maintained at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/package-seg.yaml](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/cfg/datasets/package-seg.yaml).
|
28 |
+
|
29 |
+
!!! Example "ultralytics/cfg/datasets/package-seg.yaml"
|
30 |
+
|
31 |
+
```yaml
|
32 |
+
--8<-- "ultralytics/cfg/datasets/package-seg.yaml"
|
33 |
+
```
|
34 |
+
|
35 |
+
## Usage
|
36 |
+
|
37 |
+
To train Ultralytics YOLOv8n model on the Package Segmentation dataset for 100 epochs with an image size of 640, you can use the following code snippets. For a comprehensive list of available arguments, refer to the model [Training](../../modes/train.md) page.
|
38 |
+
|
39 |
+
!!! Example "Train Example"
|
40 |
+
|
41 |
+
=== "Python"
|
42 |
+
|
43 |
+
```python
|
44 |
+
from ultralytics import YOLO
|
45 |
+
|
46 |
+
# Load a model
|
47 |
+
model = YOLO('yolov8n-seg.pt') # load a pretrained model (recommended for training)
|
48 |
+
|
49 |
+
# Train the model
|
50 |
+
results = model.train(data='package-seg.yaml', epochs=100, imgsz=640)
|
51 |
+
```
|
52 |
+
|
53 |
+
=== "CLI"
|
54 |
+
|
55 |
+
```bash
|
56 |
+
# Start training from a pretrained *.pt model
|
57 |
+
yolo segment train data=package-seg.yaml model=yolov8n-seg.pt epochs=100 imgsz=640
|
58 |
+
```
|
59 |
+
|
60 |
+
## Sample Data and Annotations
|
61 |
+
|
62 |
+
The Package Segmentation dataset comprises a varied collection of images and videos captured from multiple perspectives. Below are instances of data from the dataset, accompanied by their respective annotations:
|
63 |
+
|
64 |
+
![Dataset sample image](https://github.com/RizwanMunawar/RizwanMunawar/assets/62513924/55bdf5c8-4ae4-4824-8d08-63c15bdd9a92)
|
65 |
+
|
66 |
+
- This image displays an instance of image object detection, featuring annotated bounding boxes with masks outlining recognized objects. The dataset incorporates a diverse collection of images taken in different locations, environments, and densities. It serves as a comprehensive resource for developing models specific to this task.
|
67 |
+
- The example emphasizes the diversity and complexity present in the VisDrone dataset, underscoring the significance of high-quality sensor data for computer vision tasks involving drones.
|
68 |
+
|
69 |
+
## Citations and Acknowledgments
|
70 |
+
|
71 |
+
If you integrate the crack segmentation dataset into your research or development initiatives, please cite the following paper:
|
72 |
+
|
73 |
+
!!! Quote ""
|
74 |
+
|
75 |
+
=== "BibTeX"
|
76 |
+
|
77 |
+
```bibtex
|
78 |
+
@misc{ factory_package_dataset,
|
79 |
+
title = { factory_package Dataset },
|
80 |
+
type = { Open Source Dataset },
|
81 |
+
author = { factorypackage },
|
82 |
+
howpublished = { \url{ https://universe.roboflow.com/factorypackage/factory_package } },
|
83 |
+
url = { https://universe.roboflow.com/factorypackage/factory_package },
|
84 |
+
journal = { Roboflow Universe },
|
85 |
+
publisher = { Roboflow },
|
86 |
+
year = { 2024 },
|
87 |
+
month = { jan },
|
88 |
+
note = { visited on 2024-01-24 },
|
89 |
+
}
|
90 |
+
```
|
91 |
+
|
92 |
+
We express our gratitude to the Roboflow team for their efforts in creating and maintaining the Package Segmentation dataset, a valuable asset for logistics and research projects. For additional details about the Package Segmentation dataset and its creators, please visit the [Package Segmentation Dataset Page](https://universe.roboflow.com/factorypackage/factory_package).
|
docs/en/datasets/track/index.md
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
comments: true
|
3 |
+
description: Understand multi-object tracking datasets, upcoming features and how to use them with YOLO in Python and CLI. Dive in now!.
|
4 |
+
keywords: Ultralytics, YOLO, multi-object tracking, datasets, detection, segmentation, pose models, Python, CLI
|
5 |
+
---
|
6 |
+
|
7 |
+
# Multi-object Tracking Datasets Overview
|
8 |
+
|
9 |
+
## Dataset Format (Coming Soon)
|
10 |
+
|
11 |
+
Multi-Object Detector doesn't need standalone training and directly supports pre-trained detection, segmentation or Pose models. Support for training trackers alone is coming soon
|
12 |
+
|
13 |
+
## Usage
|
14 |
+
|
15 |
+
!!! Example
|
16 |
+
|
17 |
+
=== "Python"
|
18 |
+
|
19 |
+
```python
|
20 |
+
from ultralytics import YOLO
|
21 |
+
|
22 |
+
model = YOLO('yolov8n.pt')
|
23 |
+
results = model.track(source="https://youtu.be/LNwODJXcvt4", conf=0.3, iou=0.5, show=True)
|
24 |
+
```
|
25 |
+
=== "CLI"
|
26 |
+
|
27 |
+
```bash
|
28 |
+
yolo track model=yolov8n.pt source="https://youtu.be/LNwODJXcvt4" conf=0.3, iou=0.5 show
|
29 |
+
```
|
docs/mkdocs_github_authors.yaml
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
1185102784@qq.com: Laughing-q
|
2 |
+
1579093407@qq.com: null
|
3 |
+
17216799+ouphi@users.noreply.github.com: ouphi
|
4 |
+
17316848+maianumerosky@users.noreply.github.com: maianumerosky
|
5 |
+
34196005+fcakyon@users.noreply.github.com: fcakyon
|
6 |
+
37276661+capjamesg@users.noreply.github.com: capjamesg
|
7 |
+
39910262+ChaoningZhang@users.noreply.github.com: ChaoningZhang
|
8 |
+
40165666+berry-ding@users.noreply.github.com: berry-ding
|
9 |
+
47978446+sergiuwaxmann@users.noreply.github.com: sergiuwaxmann
|
10 |
+
61612323+Laughing-q@users.noreply.github.com: Laughing-q
|
11 |
+
62214284+Burhan-Q@users.noreply.github.com: Burhan-Q
|
12 |
+
75611662+tensorturtle@users.noreply.github.com: tensorturtle
|
13 |
+
78843978+Skillnoob@users.noreply.github.com: Skillnoob
|
14 |
+
79740115+0xSynapse@users.noreply.github.com: 0xSynapse
|
15 |
+
abirami.vina@gmail.com: abirami-vina
|
16 |
+
ayush.chaurarsia@gmail.com: AyushExel
|
17 |
+
chr043416@gmail.com: RizwanMunawar
|
18 |
+
glenn.jocher@ultralytics.com: glenn-jocher
|
19 |
+
muhammadrizwanmunawar123@gmail.com: RizwanMunawar
|
20 |
+
not.committed.yet: null
|
21 |
+
priytosh.revolution@live.com: priytosh-tripathi
|
22 |
+
shuizhuyuanluo@126.com: null
|
23 |
+
xinwang614@gmail.com: GreatV
|
tests/conftest.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
2 |
+
|
3 |
+
import shutil
|
4 |
+
from pathlib import Path
|
5 |
+
|
6 |
+
TMP = Path(__file__).resolve().parent / "tmp" # temp directory for test files
|
7 |
+
|
8 |
+
|
9 |
+
def pytest_addoption(parser):
|
10 |
+
"""
|
11 |
+
Add custom command-line options to pytest.
|
12 |
+
|
13 |
+
Args:
|
14 |
+
parser (pytest.config.Parser): The pytest parser object.
|
15 |
+
"""
|
16 |
+
parser.addoption("--slow", action="store_true", default=False, help="Run slow tests")
|
17 |
+
|
18 |
+
|
19 |
+
def pytest_collection_modifyitems(config, items):
|
20 |
+
"""
|
21 |
+
Modify the list of test items to remove tests marked as slow if the --slow option is not provided.
|
22 |
+
|
23 |
+
Args:
|
24 |
+
config (pytest.config.Config): The pytest config object.
|
25 |
+
items (list): List of test items to be executed.
|
26 |
+
"""
|
27 |
+
if not config.getoption("--slow"):
|
28 |
+
# Remove the item entirely from the list of test items if it's marked as 'slow'
|
29 |
+
items[:] = [item for item in items if "slow" not in item.keywords]
|
30 |
+
|
31 |
+
|
32 |
+
def pytest_sessionstart(session):
|
33 |
+
"""
|
34 |
+
Initialize session configurations for pytest.
|
35 |
+
|
36 |
+
This function is automatically called by pytest after the 'Session' object has been created but before performing
|
37 |
+
test collection. It sets the initial seeds and prepares the temporary directory for the test session.
|
38 |
+
|
39 |
+
Args:
|
40 |
+
session (pytest.Session): The pytest session object.
|
41 |
+
"""
|
42 |
+
from ultralytics.utils.torch_utils import init_seeds
|
43 |
+
|
44 |
+
init_seeds()
|
45 |
+
shutil.rmtree(TMP, ignore_errors=True) # delete any existing tests/tmp directory
|
46 |
+
TMP.mkdir(parents=True, exist_ok=True) # create a new empty directory
|
47 |
+
|
48 |
+
|
49 |
+
def pytest_terminal_summary(terminalreporter, exitstatus, config):
|
50 |
+
"""
|
51 |
+
Cleanup operations after pytest session.
|
52 |
+
|
53 |
+
This function is automatically called by pytest at the end of the entire test session. It removes certain files
|
54 |
+
and directories used during testing.
|
55 |
+
|
56 |
+
Args:
|
57 |
+
terminalreporter (pytest.terminal.TerminalReporter): The terminal reporter object.
|
58 |
+
exitstatus (int): The exit status of the test run.
|
59 |
+
config (pytest.config.Config): The pytest config object.
|
60 |
+
"""
|
61 |
+
from ultralytics.utils import WEIGHTS_DIR
|
62 |
+
|
63 |
+
# Remove files
|
64 |
+
models = [path for x in ["*.onnx", "*.torchscript"] for path in WEIGHTS_DIR.rglob(x)]
|
65 |
+
for file in ["bus.jpg", "yolov8n.onnx", "yolov8n.torchscript"] + models:
|
66 |
+
Path(file).unlink(missing_ok=True)
|
67 |
+
|
68 |
+
# Remove directories
|
69 |
+
models = [path for x in ["*.mlpackage", "*_openvino_model"] for path in WEIGHTS_DIR.rglob(x)]
|
70 |
+
for directory in [TMP.parents[1] / ".pytest_cache", TMP] + models:
|
71 |
+
shutil.rmtree(directory, ignore_errors=True)
|
tests/test_cli.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
2 |
+
|
3 |
+
import subprocess
|
4 |
+
|
5 |
+
import pytest
|
6 |
+
|
7 |
+
from ultralytics.utils import ASSETS, WEIGHTS_DIR, checks
|
8 |
+
|
9 |
+
CUDA_IS_AVAILABLE = checks.cuda_is_available()
|
10 |
+
CUDA_DEVICE_COUNT = checks.cuda_device_count()
|
11 |
+
TASK_ARGS = [
|
12 |
+
("detect", "yolov8n", "coco8.yaml"),
|
13 |
+
("segment", "yolov8n-seg", "coco8-seg.yaml"),
|
14 |
+
("classify", "yolov8n-cls", "imagenet10"),
|
15 |
+
("pose", "yolov8n-pose", "coco8-pose.yaml"),
|
16 |
+
("obb", "yolov8n-obb", "dota8.yaml"),
|
17 |
+
] # (task, model, data)
|
18 |
+
EXPORT_ARGS = [
|
19 |
+
("yolov8n", "torchscript"),
|
20 |
+
("yolov8n-seg", "torchscript"),
|
21 |
+
("yolov8n-cls", "torchscript"),
|
22 |
+
("yolov8n-pose", "torchscript"),
|
23 |
+
("yolov8n-obb", "torchscript"),
|
24 |
+
] # (model, format)
|
25 |
+
|
26 |
+
|
27 |
+
def run(cmd):
|
28 |
+
"""Execute a shell command using subprocess."""
|
29 |
+
subprocess.run(cmd.split(), check=True)
|
30 |
+
|
31 |
+
|
32 |
+
def test_special_modes():
|
33 |
+
"""Test various special command modes of YOLO."""
|
34 |
+
run("yolo help")
|
35 |
+
run("yolo checks")
|
36 |
+
run("yolo version")
|
37 |
+
run("yolo settings reset")
|
38 |
+
run("yolo cfg")
|
39 |
+
|
40 |
+
|
41 |
+
@pytest.mark.parametrize("task,model,data", TASK_ARGS)
|
42 |
+
def test_train(task, model, data):
|
43 |
+
"""Test YOLO training for a given task, model, and data."""
|
44 |
+
run(f"yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 cache=disk")
|
45 |
+
|
46 |
+
|
47 |
+
@pytest.mark.parametrize("task,model,data", TASK_ARGS)
|
48 |
+
def test_val(task, model, data):
|
49 |
+
"""Test YOLO validation for a given task, model, and data."""
|
50 |
+
run(f"yolo val {task} model={WEIGHTS_DIR / model}.pt data={data} imgsz=32 save_txt save_json")
|
51 |
+
|
52 |
+
|
53 |
+
@pytest.mark.parametrize("task,model,data", TASK_ARGS)
|
54 |
+
def test_predict(task, model, data):
|
55 |
+
"""Test YOLO prediction on sample assets for a given task and model."""
|
56 |
+
run(f"yolo predict model={WEIGHTS_DIR / model}.pt source={ASSETS} imgsz=32 save save_crop save_txt")
|
57 |
+
|
58 |
+
|
59 |
+
@pytest.mark.parametrize("model,format", EXPORT_ARGS)
|
60 |
+
def test_export(model, format):
|
61 |
+
"""Test exporting a YOLO model to different formats."""
|
62 |
+
run(f"yolo export model={WEIGHTS_DIR / model}.pt format={format} imgsz=32")
|
63 |
+
|
64 |
+
|
65 |
+
def test_rtdetr(task="detect", model="yolov8n-rtdetr.yaml", data="coco8.yaml"):
|
66 |
+
"""Test the RTDETR functionality with the Ultralytics framework."""
|
67 |
+
# Warning: MUST use imgsz=640
|
68 |
+
run(f"yolo train {task} model={model} data={data} --imgsz= 160 epochs =1, cache = disk") # add coma, spaces to args
|
69 |
+
run(f"yolo predict {task} model={model} source={ASSETS / 'bus.jpg'} imgsz=160 save save_crop save_txt")
|
70 |
+
|
71 |
+
|
72 |
+
@pytest.mark.skipif(checks.IS_PYTHON_3_12, reason="MobileSAM with CLIP is not supported in Python 3.12")
|
73 |
+
def test_fastsam(task="segment", model=WEIGHTS_DIR / "FastSAM-s.pt", data="coco8-seg.yaml"):
|
74 |
+
"""Test FastSAM segmentation functionality within Ultralytics."""
|
75 |
+
source = ASSETS / "bus.jpg"
|
76 |
+
|
77 |
+
run(f"yolo segment val {task} model={model} data={data} imgsz=32")
|
78 |
+
run(f"yolo segment predict model={model} source={source} imgsz=32 save save_crop save_txt")
|
79 |
+
|
80 |
+
from ultralytics import FastSAM
|
81 |
+
from ultralytics.models.fastsam import FastSAMPrompt
|
82 |
+
from ultralytics.models.sam import Predictor
|
83 |
+
|
84 |
+
# Create a FastSAM model
|
85 |
+
sam_model = FastSAM(model) # or FastSAM-x.pt
|
86 |
+
|
87 |
+
# Run inference on an image
|
88 |
+
everything_results = sam_model(source, device="cpu", retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
|
89 |
+
|
90 |
+
# Remove small regions
|
91 |
+
new_masks, _ = Predictor.remove_small_regions(everything_results[0].masks.data, min_area=20)
|
92 |
+
|
93 |
+
# Everything prompt
|
94 |
+
prompt_process = FastSAMPrompt(source, everything_results, device="cpu")
|
95 |
+
ann = prompt_process.everything_prompt()
|
96 |
+
|
97 |
+
# Bbox default shape [0,0,0,0] -> [x1,y1,x2,y2]
|
98 |
+
ann = prompt_process.box_prompt(bbox=[200, 200, 300, 300])
|
99 |
+
|
100 |
+
# Text prompt
|
101 |
+
ann = prompt_process.text_prompt(text="a photo of a dog")
|
102 |
+
|
103 |
+
# Point prompt
|
104 |
+
# Points default [[0,0]] [[x1,y1],[x2,y2]]
|
105 |
+
# Point_label default [0] [1,0] 0:background, 1:foreground
|
106 |
+
ann = prompt_process.point_prompt(points=[[200, 200]], pointlabel=[1])
|
107 |
+
prompt_process.plot(annotations=ann, output="./")
|
108 |
+
|
109 |
+
|
110 |
+
def test_mobilesam():
|
111 |
+
"""Test MobileSAM segmentation functionality using Ultralytics."""
|
112 |
+
from ultralytics import SAM
|
113 |
+
|
114 |
+
# Load the model
|
115 |
+
model = SAM(WEIGHTS_DIR / "mobile_sam.pt")
|
116 |
+
|
117 |
+
# Source
|
118 |
+
source = ASSETS / "zidane.jpg"
|
119 |
+
|
120 |
+
# Predict a segment based on a point prompt
|
121 |
+
model.predict(source, points=[900, 370], labels=[1])
|
122 |
+
|
123 |
+
# Predict a segment based on a box prompt
|
124 |
+
model.predict(source, bboxes=[439, 437, 524, 709])
|
125 |
+
|
126 |
+
# Predict all
|
127 |
+
# model(source)
|
128 |
+
|
129 |
+
|
130 |
+
# Slow Tests -----------------------------------------------------------------------------------------------------------
|
131 |
+
@pytest.mark.slow
|
132 |
+
@pytest.mark.parametrize("task,model,data", TASK_ARGS)
|
133 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
134 |
+
@pytest.mark.skipif(CUDA_DEVICE_COUNT < 2, reason="DDP is not available")
|
135 |
+
def test_train_gpu(task, model, data):
|
136 |
+
"""Test YOLO training on GPU(s) for various tasks and models."""
|
137 |
+
run(f"yolo train {task} model={model}.yaml data={data} imgsz=32 epochs=1 device=0") # single GPU
|
138 |
+
run(f"yolo train {task} model={model}.pt data={data} imgsz=32 epochs=1 device=0,1") # multi GPU
|
tests/test_cuda.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
2 |
+
|
3 |
+
import pytest
|
4 |
+
import torch
|
5 |
+
|
6 |
+
from ultralytics import YOLO
|
7 |
+
from ultralytics.utils import ASSETS, WEIGHTS_DIR, checks
|
8 |
+
|
9 |
+
CUDA_IS_AVAILABLE = checks.cuda_is_available()
|
10 |
+
CUDA_DEVICE_COUNT = checks.cuda_device_count()
|
11 |
+
|
12 |
+
MODEL = WEIGHTS_DIR / "path with spaces" / "yolov8n.pt" # test spaces in path
|
13 |
+
DATA = "coco8.yaml"
|
14 |
+
BUS = ASSETS / "bus.jpg"
|
15 |
+
|
16 |
+
|
17 |
+
def test_checks():
|
18 |
+
"""Validate CUDA settings against torch CUDA functions."""
|
19 |
+
assert torch.cuda.is_available() == CUDA_IS_AVAILABLE
|
20 |
+
assert torch.cuda.device_count() == CUDA_DEVICE_COUNT
|
21 |
+
|
22 |
+
|
23 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
24 |
+
def test_train():
|
25 |
+
"""Test model training on a minimal dataset."""
|
26 |
+
device = 0 if CUDA_DEVICE_COUNT == 1 else [0, 1]
|
27 |
+
YOLO(MODEL).train(data=DATA, imgsz=64, epochs=1, device=device) # requires imgsz>=64
|
28 |
+
|
29 |
+
|
30 |
+
@pytest.mark.slow
|
31 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
32 |
+
def test_predict_multiple_devices():
|
33 |
+
"""Validate model prediction on multiple devices."""
|
34 |
+
model = YOLO("yolov8n.pt")
|
35 |
+
model = model.cpu()
|
36 |
+
assert str(model.device) == "cpu"
|
37 |
+
_ = model(BUS) # CPU inference
|
38 |
+
assert str(model.device) == "cpu"
|
39 |
+
|
40 |
+
model = model.to("cuda:0")
|
41 |
+
assert str(model.device) == "cuda:0"
|
42 |
+
_ = model(BUS) # CUDA inference
|
43 |
+
assert str(model.device) == "cuda:0"
|
44 |
+
|
45 |
+
model = model.cpu()
|
46 |
+
assert str(model.device) == "cpu"
|
47 |
+
_ = model(BUS) # CPU inference
|
48 |
+
assert str(model.device) == "cpu"
|
49 |
+
|
50 |
+
model = model.cuda()
|
51 |
+
assert str(model.device) == "cuda:0"
|
52 |
+
_ = model(BUS) # CUDA inference
|
53 |
+
assert str(model.device) == "cuda:0"
|
54 |
+
|
55 |
+
|
56 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
57 |
+
def test_autobatch():
|
58 |
+
"""Check batch size for YOLO model using autobatch."""
|
59 |
+
from ultralytics.utils.autobatch import check_train_batch_size
|
60 |
+
|
61 |
+
check_train_batch_size(YOLO(MODEL).model.cuda(), imgsz=128, amp=True)
|
62 |
+
|
63 |
+
|
64 |
+
@pytest.mark.slow
|
65 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
66 |
+
def test_utils_benchmarks():
|
67 |
+
"""Profile YOLO models for performance benchmarks."""
|
68 |
+
from ultralytics.utils.benchmarks import ProfileModels
|
69 |
+
|
70 |
+
# Pre-export a dynamic engine model to use dynamic inference
|
71 |
+
YOLO(MODEL).export(format="engine", imgsz=32, dynamic=True, batch=1)
|
72 |
+
ProfileModels([MODEL], imgsz=32, half=False, min_time=1, num_timed_runs=3, num_warmup_runs=1).profile()
|
73 |
+
|
74 |
+
|
75 |
+
@pytest.mark.skipif(not CUDA_IS_AVAILABLE, reason="CUDA is not available")
|
76 |
+
def test_predict_sam():
|
77 |
+
"""Test SAM model prediction with various prompts."""
|
78 |
+
from ultralytics import SAM
|
79 |
+
from ultralytics.models.sam import Predictor as SAMPredictor
|
80 |
+
|
81 |
+
# Load a model
|
82 |
+
model = SAM(WEIGHTS_DIR / "sam_b.pt")
|
83 |
+
|
84 |
+
# Display model information (optional)
|
85 |
+
model.info()
|
86 |
+
|
87 |
+
# Run inference
|
88 |
+
model(BUS, device=0)
|
89 |
+
|
90 |
+
# Run inference with bboxes prompt
|
91 |
+
model(BUS, bboxes=[439, 437, 524, 709], device=0)
|
92 |
+
|
93 |
+
# Run inference with points prompt
|
94 |
+
model(ASSETS / "zidane.jpg", points=[900, 370], labels=[1], device=0)
|
95 |
+
|
96 |
+
# Create SAMPredictor
|
97 |
+
overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024, model=WEIGHTS_DIR / "mobile_sam.pt")
|
98 |
+
predictor = SAMPredictor(overrides=overrides)
|
99 |
+
|
100 |
+
# Set image
|
101 |
+
predictor.set_image(ASSETS / "zidane.jpg") # set with image file
|
102 |
+
# predictor(bboxes=[439, 437, 524, 709])
|
103 |
+
# predictor(points=[900, 370], labels=[1])
|
104 |
+
|
105 |
+
# Reset image
|
106 |
+
predictor.reset_image()
|