diff --git a/docs/source/docs/command-reference/context_free/generate.md b/docs/source/docs/command-reference/context_free/generate.md deleted file mode 100644 index 68f37f5dec..0000000000 --- a/docs/source/docs/command-reference/context_free/generate.md +++ /dev/null @@ -1,35 +0,0 @@ -# Generate - -## Generate Datasets - -Creates a synthetic dataset with elements of the specified type and shape, -and saves it in the provided directory. - -Currently, can only generate fractal images, useful for network compression. -To create 3-channel images, you should provide the number of images, height and width. -The images are colorized with a model, which will be downloaded automatically. -Uses the algorithm from the [article](https://arxiv.org/abs/2103.13023). - -Usage: - -```console -datum generate [-h] -o OUTPUT_DIR -k COUNT --shape SHAPE [SHAPE ...] - [-t {image}] [--overwrite] [--model-dir MODEL_PATH] -``` - -Parameters: -- `-o, --output-dir` (string) - Output directory -- `-k, --count` (integer) - Number of images to be generated -- `--shape` (integer, repeatable) - Dimensions of data to be generated (H, W) -- `-t, --type` (one of: `image`) - Specify the type of data to generate (default: `image`) -- `--model-dir` (path) - Path to load the colorization model from. - If no model is found, the model will be downloaded (default: current dir) -- `--overwrite` - Allows overwriting existing files in the output directory, - when it is not empty. -- `-h, --help` - Print the help message and exit. - -Examples: -- Generate 300 3-channel fractal images with H=224, W=256 and store in the `images/` dir - ```console - datum generate -o images/ --count 300 --shape 224 256 - ``` diff --git a/src/datumaro/cli/commands/__init__.py b/src/datumaro/cli/commands/__init__.py index 2a58baf3a8..7432c900d9 100644 --- a/src/datumaro/cli/commands/__init__.py +++ b/src/datumaro/cli/commands/__init__.py @@ -10,7 +10,6 @@ detect_format, download, filter, - generate, info, merge, patch, @@ -36,7 +35,6 @@ def get_non_project_commands(): ("dinfo", info, "Print dataset info"), ("download", download, "Download a publicly available dataset"), ("filter", filter, "Filter dataset items"), - ("generate", generate, f"{deprecated} Generate synthetic dataset"), ("merge", merge, "Merge datasets"), ("patch", patch, "Update dataset from another one"), ("stats", stats, "Compute dataset statistics"), diff --git a/src/datumaro/cli/commands/generate.py b/src/datumaro/cli/commands/generate.py deleted file mode 100644 index 679202200e..0000000000 --- a/src/datumaro/cli/commands/generate.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import argparse -import logging as log -import os -import os.path as osp -from shutil import rmtree - -from datumaro.cli.util.errors import CliException -from datumaro.util.definitions import get_datumaro_cache_dir - -from ..util import MultilineFormatter - -deprecated = "[DEPRECATED, will be removed in 1.12]" - - -def build_parser(parser_ctor=argparse.ArgumentParser): - parser = parser_ctor( - help=f"{deprecated} Generate synthetic dataset", - description=f""" - {deprecated} Creates a synthetic dataset with elements of the specified type and shape, - and saves it in the provided directory.|n - |n - Currently, can only generate fractal images, useful for network compression.|n - To create 3-channel images, you should provide the number of images, height and width.|n - The images are colorized with a model, which will be downloaded automatically.|n - Uses the algorithm from the article: https://arxiv.org/abs/2103.13023 |n - |n - Examples:|n - - Generate 300 3-channel images with H=224, W=256 and store to data_dir:|n - |s|s%(prog)s -o data_dir -k 300 --shape 224 256 - """, - formatter_class=MultilineFormatter, - ) - - parser.add_argument( - "-o", "--output-dir", required=True, help="Output directory to store generated dataset" - ) - parser.add_argument( - "-k", "--count", type=int, required=True, help="Number of images to be generated" - ) - parser.add_argument( - "--shape", - nargs=2, - metavar="DIM", - type=int, - required=True, - help="Dimensions of data to be generated (height, width)", - ) - parser.add_argument( - "-t", - "--type", - default="image", - choices=["image"], - help="Specify type of data to generate (default: %(default)s)", - ) - parser.add_argument( - "--model-dir", - type=str, - default=get_datumaro_cache_dir(), - help="Path to load the colorization model from. " - "If no model is found, the model will be downloaded (default: %(default)s)", - ) - parser.add_argument( - "--overwrite", action="store_true", help="Overwrite existing files in the save directory" - ) - - parser.set_defaults(command=generate_command) - - return parser - - -def get_sensitive_args(): - return {generate_command: ["output_dir", "model_dir"]} - - -def generate_command(args): - log.warning("This command is deprecated and will be removed in Datumaro 1.12") - from datumaro.plugins.synthetic_data import FractalImageGenerator - - log.info("Generating dataset...") - output_dir = args.output_dir - - if osp.isdir(output_dir) and os.listdir(output_dir): - if args.overwrite: - rmtree(output_dir) - os.mkdir(output_dir) - else: - raise CliException( - f"Directory '{output_dir}' already exists (pass --overwrite to overwrite)" - ) - - if args.type == "image": - FractalImageGenerator( - count=args.count, output_dir=output_dir, shape=args.shape, model_path=args.model_dir - ).generate_dataset() - else: - raise NotImplementedError(f"Data type: {args.type} is not supported") - - log.info(f"Results have been saved to '{output_dir}'") - - return 0 diff --git a/src/datumaro/plugins/synthetic_data/__init__.py b/src/datumaro/plugins/synthetic_data/__init__.py deleted file mode 100644 index c43dc7bab7..0000000000 --- a/src/datumaro/plugins/synthetic_data/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT - -from datumaro.plugins.synthetic_data.image_generator import FractalImageGenerator diff --git a/src/datumaro/plugins/synthetic_data/background_colors.txt b/src/datumaro/plugins/synthetic_data/background_colors.txt deleted file mode 100644 index 74fbf91dd0..0000000000 --- a/src/datumaro/plugins/synthetic_data/background_colors.txt +++ /dev/null @@ -1,1000 +0,0 @@ -89 108 102 -79 106 114 -138 108 57 -136 104 43 -119 92 48 -112 118 101 -136 135 77 -90 111 126 -83 105 116 -108 129 131 -113 129 128 -88 117 112 -112 125 127 -107 121 125 -102 133 116 -107 128 128 -103 127 121 -89 107 103 -107 124 116 -100 116 115 -104 116 117 -153 148 139 -131 120 108 -120 124 118 -107 112 110 -76 100 107 -93 119 126 -84 107 117 -74 94 106 -75 97 107 -81 105 103 -66 105 102 -81 109 116 -121 113 89 -118 114 113 -92 111 120 -90 109 117 -92 117 123 -87 102 124 -77 101 99 -78 120 114 -99 120 136 -93 109 122 -77 102 112 -103 117 128 -84 105 120 -81 121 114 -75 90 93 -91 105 110 -95 111 115 -96 106 106 -83 96 104 -100 115 131 -99 111 125 -92 113 129 -64 102 93 -100 115 125 -87 109 122 -105 122 126 -58 87 84 -82 101 116 -83 103 116 -86 103 112 -86 105 118 -60 91 87 -111 122 100 -90 117 140 -97 111 121 -94 112 128 -102 118 131 -95 131 122 -123 134 146 -94 130 120 -88 111 117 -93 114 105 -123 132 139 -98 113 127 -109 129 137 -125 147 157 -95 117 134 -113 129 136 -111 117 120 -105 121 121 -106 129 142 -97 122 111 -106 129 134 -104 125 131 -99 108 114 -83 105 110 -97 109 108 -94 116 122 -95 127 124 -104 130 126 -97 112 114 -94 120 120 -94 129 125 -85 103 97 -106 119 117 -133 128 118 -110 127 127 -109 120 121 -85 108 108 -104 120 130 -81 97 100 -102 120 125 -104 113 117 -96 109 113 -131 73 51 -89 99 107 -107 123 81 -78 97 113 -56 73 71 -94 103 115 -81 104 112 -98 116 125 -75 102 110 -97 110 123 -83 84 84 -88 109 129 -105 117 124 -102 116 126 -83 96 119 -87 101 135 -90 101 101 -86 104 120 -114 124 137 -91 101 113 -125 129 115 -112 126 121 -108 116 114 -76 93 110 -98 116 108 -90 99 95 -88 115 115 -101 120 118 -93 117 113 -93 121 117 -120 127 122 -92 124 135 -124 132 135 -127 130 129 -121 130 125 -126 129 127 -138 139 132 -122 122 115 -112 121 118 -116 111 98 -133 122 105 -131 111 79 -125 127 53 -113 117 107 -99 111 126 -106 124 120 -108 118 126 -96 112 125 -97 108 115 -93 111 118 -95 113 121 -96 116 120 -99 119 128 -104 123 126 -94 110 120 -98 113 120 -105 120 130 -102 117 124 -95 110 114 -102 113 116 -99 115 121 -109 124 139 -99 110 112 -105 118 119 -95 104 112 -96 118 124 -103 116 123 -103 116 116 -94 109 115 -103 120 122 -102 123 118 -101 113 120 -96 124 120 -97 110 119 -113 122 123 -109 124 129 -110 126 127 -104 124 135 -105 127 133 -87 103 113 -97 113 125 -94 127 127 -94 113 119 -114 121 122 -103 122 126 -106 122 126 -96 116 122 -104 119 121 -96 106 117 -112 121 126 -109 129 128 -99 121 124 -99 110 116 -93 113 119 -89 107 122 -83 108 120 -82 92 101 -103 115 123 -105 121 119 -107 127 129 -99 119 129 -97 113 124 -108 125 133 -106 125 128 -91 117 127 -92 105 108 -94 113 130 -101 118 118 -94 106 113 -97 115 126 -89 104 107 -100 119 120 -93 111 123 -97 115 119 -107 128 128 -110 119 120 -111 122 126 -100 113 113 -100 120 121 -101 122 124 -107 123 127 -101 119 122 -102 115 119 -91 102 111 -97 112 115 -96 109 109 -114 130 131 -110 121 127 -109 126 128 -102 117 118 -94 106 117 -107 116 116 -104 115 116 -92 112 110 -99 121 118 -93 110 122 -96 116 126 -103 112 115 -101 112 118 -100 112 118 -112 121 128 -111 120 121 -97 110 111 -103 111 113 -93 101 110 -97 113 111 -88 108 121 -102 112 122 -90 112 120 -105 115 113 -98 108 115 -111 122 122 -98 118 130 -94 111 123 -110 118 118 -103 121 132 -101 115 119 -99 118 124 -92 111 127 -94 107 117 -111 124 124 -107 120 123 -93 107 112 -102 111 111 -94 118 128 -111 125 132 -86 105 116 -81 108 115 -95 120 122 -104 128 133 -92 112 120 -87 115 120 -104 112 113 -95 111 119 -105 117 126 -96 109 119 -111 115 121 -117 124 132 -87 99 110 -94 110 121 -93 109 114 -87 105 114 -114 125 124 -79 100 105 -84 107 121 -87 109 123 -96 122 134 -98 114 117 -92 114 110 -148 145 141 -92 104 105 -97 119 125 -97 115 126 -102 128 135 -84 121 125 -134 151 157 -102 137 132 -66 125 111 -103 124 140 -112 129 137 -86 126 127 -95 131 134 -86 122 136 -97 122 134 -91 122 120 -103 129 130 -89 110 113 -111 131 150 -75 102 107 -100 120 125 -72 126 118 -86 119 116 -81 125 118 -62 118 105 -81 112 113 -64 124 108 -86 122 119 -72 105 102 -68 115 120 -76 115 107 -99 104 109 -103 112 116 -95 108 111 -97 124 123 -92 125 122 -90 98 110 -108 122 148 -99 113 116 -95 115 115 -101 119 119 -99 113 117 -101 112 122 -90 116 134 -96 113 118 -90 105 116 -112 127 129 -93 115 122 -106 115 114 -103 122 125 -103 125 124 -93 110 114 -94 112 119 -103 119 125 -111 123 128 -115 140 149 -94 120 126 -93 118 124 -107 124 132 -105 120 124 -96 113 117 -100 111 116 -98 111 119 -97 104 109 -101 110 107 -82 100 106 -90 108 111 -83 102 109 -84 113 107 -86 107 114 -85 104 102 -98 112 111 -94 114 114 -90 110 105 -87 105 105 -88 115 118 -105 124 131 -92 109 112 -89 107 111 -91 108 107 -87 114 113 -91 110 116 -82 104 97 -90 117 104 -77 108 108 -90 112 107 -89 114 114 -92 114 119 -97 115 108 -100 118 122 -103 121 126 -81 99 105 -80 94 90 -114 115 105 -88 103 99 -107 111 107 -82 99 84 -87 98 94 -113 118 106 -91 104 94 -77 81 78 -101 107 91 -118 129 144 -101 106 114 -86 97 104 -68 74 83 -65 73 93 -142 133 122 -146 141 131 -147 134 121 -70 84 102 -96 105 115 -130 137 136 -129 136 141 -97 115 117 -109 116 135 -92 102 110 -108 118 124 -109 114 120 -88 104 126 -89 96 103 -129 121 115 -111 124 138 -123 131 146 -78 89 102 -97 111 122 -125 130 136 -94 105 115 -107 114 121 -108 117 120 -105 115 125 -103 116 127 -102 116 123 -98 114 120 -97 109 122 -119 125 133 -89 101 118 -121 120 111 -118 126 140 -117 126 137 -98 104 110 -117 109 109 -104 114 121 -100 106 124 -83 96 106 -78 94 120 -134 129 124 -120 124 132 -110 117 119 -118 126 128 -145 147 152 -114 118 122 -96 111 114 -106 110 105 -135 132 135 -110 112 124 -115 127 145 -94 105 122 -86 98 111 -99 116 131 -106 122 128 -105 112 120 -112 120 124 -119 125 140 -135 130 122 -88 99 107 -95 110 126 -118 125 131 -118 123 129 -117 122 126 -101 104 105 -80 92 126 -95 108 112 -82 96 104 -55 68 95 -114 122 121 -130 140 135 -155 158 159 -114 123 134 -99 102 96 -82 96 114 -133 134 141 -93 112 132 -94 97 103 -95 99 102 -104 112 120 -128 129 132 -112 115 114 -152 142 128 -96 103 109 -72 86 109 -126 129 134 -105 113 119 -102 113 114 -107 115 125 -116 130 136 -93 110 130 -99 122 146 -100 108 114 -88 104 123 -105 112 141 -96 100 103 -93 94 102 -94 102 118 -93 110 132 -89 99 108 -117 125 139 -116 121 128 -96 111 126 -97 109 122 -92 105 117 -106 112 116 -90 95 100 -87 103 122 -139 132 125 -112 115 118 -158 163 174 -78 84 90 -100 110 122 -107 116 126 -105 113 119 -137 131 124 -132 134 140 -98 111 124 -105 119 134 -89 102 118 -86 122 117 -115 121 129 -83 97 111 -116 120 115 -96 105 116 -105 112 117 -97 113 127 -98 112 126 -98 98 106 -110 113 123 -97 115 139 -119 124 133 -110 121 132 -93 100 104 -130 125 115 -142 138 137 -101 108 113 -97 108 118 -145 132 123 -83 88 99 -98 109 122 -95 107 114 -82 95 108 -126 124 126 -66 74 91 -117 121 124 -100 112 131 -129 140 153 -145 146 148 -153 159 173 -80 78 106 -96 113 123 -142 134 129 -97 101 114 -106 119 134 -150 136 124 -81 92 107 -98 112 122 -89 96 100 -106 117 125 -84 91 94 -123 132 139 -101 115 133 -108 110 113 -68 77 89 -88 102 117 -95 103 115 -116 118 110 -102 105 109 -104 109 116 -98 109 121 -97 105 107 -99 118 115 -101 111 112 -126 125 117 -109 123 134 -116 126 138 -85 95 106 -104 116 115 -93 97 97 -75 96 111 -91 98 110 -131 140 150 -101 109 124 -120 129 128 -100 112 122 -112 127 146 -107 117 131 -129 135 139 -128 134 140 -97 104 110 -84 92 104 -78 91 108 -111 123 130 -102 114 118 -91 102 123 -81 90 104 -84 114 134 -120 128 130 -90 99 109 -96 103 114 -109 120 121 -115 123 133 -121 126 133 -150 152 154 -19 30 61 -113 117 123 -104 110 111 -119 119 131 -92 107 126 -113 115 119 -120 127 132 -95 102 119 -145 144 148 -100 109 120 -149 156 161 -86 101 116 -67 88 110 -124 126 132 -98 117 115 -92 98 108 -115 124 135 -90 103 117 -128 127 129 -71 80 94 -104 105 104 -144 136 126 -117 122 143 -104 110 121 -136 144 152 -117 124 131 -103 114 123 -88 100 113 -124 137 141 -131 133 145 -99 110 125 -134 140 150 -128 136 143 -120 125 130 -118 132 152 -93 106 123 -83 92 110 -68 78 99 -111 118 113 -97 118 110 -113 128 141 -105 124 139 -121 129 126 -81 84 97 -105 113 123 -98 107 110 -100 110 115 -111 111 113 -91 103 117 -111 114 116 -133 130 127 -113 119 134 -107 123 141 -113 120 118 -99 111 113 -96 103 109 -113 116 115 -83 93 99 -114 124 124 -112 122 135 -134 134 138 -126 122 117 -129 139 145 -106 111 112 -95 109 114 -108 112 113 -100 111 121 -95 108 120 -111 115 111 -98 110 116 -95 110 120 -86 96 108 -144 152 157 -154 162 167 -102 109 115 -138 128 118 -102 105 118 -105 119 134 -96 94 93 -92 99 106 -82 94 109 -122 127 124 -105 111 121 -107 121 124 -110 115 116 -115 127 142 -115 118 119 -101 102 127 -119 128 134 -107 117 131 -101 105 117 -117 122 121 -115 131 146 -123 133 140 -172 144 123 -91 95 109 -94 112 112 -109 112 114 -104 108 108 -107 113 112 -107 115 120 -138 133 122 -126 128 141 -115 128 141 -171 176 182 -113 122 131 -124 131 133 -109 115 134 -106 111 114 -113 121 120 -93 104 109 -122 120 115 -98 107 124 -90 109 130 -131 146 157 -96 97 105 -108 117 127 -136 124 114 -89 102 110 -116 134 152 -119 116 109 -100 105 120 -106 121 135 -104 119 120 -105 117 128 -113 121 127 -115 116 115 -107 113 113 -113 116 132 -68 82 97 -99 112 124 -93 114 115 -88 105 120 -100 108 120 -84 95 124 -137 140 145 -88 94 98 -116 118 118 -115 118 121 -140 146 156 -81 87 97 -130 136 150 -99 111 122 -105 115 126 -90 100 110 -120 121 121 -104 112 118 -97 106 115 -134 128 121 -107 115 114 -114 117 112 -113 119 116 -92 100 112 -99 108 118 -97 103 114 -74 96 123 -124 128 138 -97 108 119 -102 116 127 -75 89 103 -105 115 129 -90 109 110 -115 128 141 -113 119 128 -135 139 141 -126 131 137 -122 128 136 -117 126 131 -104 107 117 -74 81 92 -105 114 131 -108 113 121 -101 115 118 -154 146 137 -102 101 99 -101 105 108 -158 156 169 -99 114 129 -83 85 93 -97 108 118 -99 115 132 -92 101 115 -93 104 117 -105 118 130 -107 110 114 -115 119 126 -91 104 119 -114 126 138 -152 143 138 -112 113 121 -127 124 134 -106 116 124 -102 113 125 -61 68 86 -125 120 80 -149 142 136 -134 133 133 -109 115 122 -121 134 136 -99 110 124 -133 126 120 -93 100 113 -93 116 136 -103 111 118 -89 99 112 -101 99 96 -109 123 133 -139 131 121 -84 98 96 -140 146 156 -101 112 118 -71 88 102 -57 65 81 -116 120 120 -127 122 113 -91 99 106 -98 104 115 -115 120 133 -100 113 119 -106 111 121 -87 92 100 -101 110 120 -118 120 123 -101 113 115 -111 118 136 -114 113 111 -134 127 114 -95 103 112 -107 122 126 -105 107 119 -101 107 118 -109 122 134 -112 111 107 -117 123 134 -116 119 128 -123 130 126 -99 109 110 -101 115 127 -98 102 110 -85 102 126 -100 111 114 -104 115 123 -114 122 133 -107 119 137 -85 94 102 -81 112 109 -107 121 126 -44 46 94 -97 110 123 -121 132 134 -76 92 115 -98 108 121 -118 127 140 -89 99 110 -101 119 134 -59 70 94 -113 119 112 -103 108 108 -93 106 123 -103 115 119 -112 112 104 -92 112 128 -110 116 122 -105 111 118 -152 140 128 -108 119 123 -115 118 118 -122 126 127 -75 80 89 -116 124 132 -86 97 107 -78 88 96 -99 105 110 -104 114 115 -76 84 97 -97 106 116 -107 122 128 -77 91 107 -102 108 132 -99 102 106 -84 97 110 -107 116 115 -74 88 110 -106 114 124 -96 110 123 -119 131 145 -113 120 138 -95 111 133 -156 147 138 -134 141 145 -112 120 127 -105 110 111 -106 114 123 -140 129 116 -109 119 128 -139 143 142 -126 136 155 -108 117 116 -101 114 125 -109 115 126 -76 91 107 -136 132 121 -74 90 107 -87 108 131 -111 118 132 -105 121 120 -127 124 115 -146 141 136 -102 108 108 -167 170 171 -103 118 133 -110 120 131 -124 123 118 -121 119 114 -94 107 124 -113 122 126 -84 109 130 -74 109 126 -88 110 133 -74 97 123 -88 104 125 -97 110 131 -102 113 128 -81 110 134 -88 115 143 -90 108 132 -86 117 143 -93 116 139 -90 121 143 -89 116 102 -88 120 109 -105 139 144 -82 116 115 -56 113 140 -81 114 130 -64 107 146 -86 117 118 -84 107 111 -72 118 125 -99 112 110 -69 93 106 -101 146 144 -86 109 151 -50 103 150 -75 121 138 -90 119 133 -76 103 112 -66 116 133 -69 109 107 -103 135 135 -67 86 126 -101 122 125 -97 128 149 -91 105 124 -95 114 133 -82 106 137 -78 106 138 -83 113 136 -93 113 131 -87 100 118 -93 107 123 -108 120 134 -81 100 119 -145 137 121 -93 101 106 -117 125 127 -121 116 76 -140 136 129 -126 124 115 -145 134 116 -148 141 131 -119 121 120 -99 107 94 -123 115 109 -98 117 122 -88 103 115 -135 114 54 -87 140 131 -76 101 109 -43 99 84 -73 115 139 -82 110 112 -83 104 115 -75 103 120 -65 86 97 -66 88 107 -77 97 115 -73 94 104 -66 83 93 -85 103 111 -93 116 124 -73 118 131 -102 115 126 diff --git a/src/datumaro/plugins/synthetic_data/image_generator.py b/src/datumaro/plugins/synthetic_data/image_generator.py deleted file mode 100644 index 2e643de92c..0000000000 --- a/src/datumaro/plugins/synthetic_data/image_generator.py +++ /dev/null @@ -1,301 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import hashlib -import logging as log -import os -import os.path as osp -from importlib.resources import open_text -from multiprocessing import get_context -from random import Random -from typing import List, Optional, Tuple - -import cv2 as cv -import numpy as np -import requests - -from datumaro.components.generator import DatasetGenerator -from datumaro.util.definitions import get_datumaro_cache_dir -from datumaro.util.deprecation import deprecated -from datumaro.util.image import save_image -from datumaro.util.scope import on_error_do, on_exit_do, scope_add, scoped - -from .utils import IFSFunction, augment, colorize, suppress_computation_warnings - - -@deprecated(deprecated_version="1.11", removed_version="1.12") -class FractalImageGenerator(DatasetGenerator): - """ - ImageGenerator generates 3-channel synthetic images with provided shape. - Uses the algorithm from the article: https://arxiv.org/abs/2103.13023 - """ - - _MODEL_PROTO_FILENAME = "colorization_deploy_v2.prototxt" - _MODEL_WEIGHTS_FILENAME = "colorization_release_v2.caffemodel" - _HULL_PTS_FILE_NAME = "pts_in_hull.npy" - _COLORS_FILE = "background_colors.txt" - - def __init__( - self, - output_dir: str, - count: int, - shape: Tuple[int, int], - model_path: str = get_datumaro_cache_dir(), - ) -> None: - assert 0 < count, "Image count cannot be lesser than 1" - self._count = count - - self._output_dir = output_dir - self._model_dir = model_path - - self._cpu_count = min(os.cpu_count(), self._count) - - assert len(shape) == 2 - self._height, self._width = shape - - self._weights = self._create_weights(IFSFunction.NUM_PARAMS) - self._threshold = 0.2 - self._iterations = 200000 - self._num_of_points = 100000 - - self._initialize_params() - - def generate_dataset(self) -> None: - log.info( - "Generation of '%d' 3-channel images with height = '%d' and width = '%d'", - self._count, - self._height, - self._width, - ) - - self._download_colorization_model(self._model_dir) - - mp_ctx = get_context("spawn") # On Mac 10.15 and Python 3.7 fork leads to hangs - with mp_ctx.Pool(processes=self._cpu_count) as pool: - try: - params = pool.map( - self._generate_category, - [Random(i) for i in range(self._categories)], # nosec B311 - ) - finally: - pool.close() - pool.join() - - instances_weights = np.repeat(self._weights, self._instances, axis=0) - weight_per_img = np.tile(instances_weights, (self._categories, 1)) - params = np.array(params, dtype=object) - repeated_params = np.repeat(params, self._weights.shape[0] * self._instances, axis=0) - repeated_params = repeated_params[: self._count] - weight_per_img = weight_per_img[: self._count] - assert weight_per_img.shape[0] == len(repeated_params) == self._count - - splits = min(self._cpu_count, self._count) - params_per_proc = np.array_split(repeated_params, splits) - weights_per_proc = np.array_split(weight_per_img, splits) - - generation_params = [] - offset = 0 - for param, w in zip(params_per_proc, weights_per_proc): - indices = list(range(offset, offset + len(param))) - offset += len(param) - generation_params.append((param, w, indices)) - - with mp_ctx.Pool(processes=self._cpu_count) as pool: - try: - pool.starmap(self._generate_image_batch, generation_params) - finally: - pool.close() - pool.join() - - @scoped - def _generate_image_batch( - self, params: np.ndarray, weights: np.ndarray, indices: List[int] - ) -> None: - scope_add(suppress_computation_warnings()) - - proto = osp.join(self._model_dir, self._MODEL_PROTO_FILENAME) - model = osp.join(self._model_dir, self._MODEL_WEIGHTS_FILENAME) - npy = osp.join(self._model_dir, self._HULL_PTS_FILE_NAME) - pts_in_hull = np.load(npy).transpose().reshape(2, 313, 1, 1).astype(np.float32) - - with open_text(__package__, self._COLORS_FILE) as f: - background_colors = np.loadtxt(f) - - net = cv.dnn.readNetFromCaffe(proto, model) - net.getLayer(net.getLayerId("class8_ab")).blobs = [pts_in_hull] - net.getLayer(net.getLayerId("conv8_313_rh")).blobs = [np.full([1, 313], 2.606, np.float32)] - - for i, param, w in zip(indices, params, weights): - image = self._generate_image( - Random(i), # nosec B311 - param, - self._iterations, - self._height, - self._width, - draw_point=False, - weight=w, - ) - color_image = colorize(image, net) - aug_image = augment(Random(i), color_image, background_colors) # nosec B311 - save_image( - osp.join(self._output_dir, "{:06d}.png".format(i)), aug_image, create_dir=True - ) - - def _generate_image( - self, - rng: Random, - params: np.ndarray, - iterations: int, - height: int, - width: int, - draw_point: bool = True, - weight: Optional[np.ndarray] = None, - ) -> np.ndarray: - ifs_function = IFSFunction(rng, prev_x=0.0, prev_y=0.0) - for param in params: - ifs_function.add_param( - param[: ifs_function.NUM_PARAMS], param[ifs_function.NUM_PARAMS], weight - ) - ifs_function.calculate(iterations) - img = ifs_function.draw(height, width, draw_point) - return img - - @scoped - def _generate_category(self, rng: Random, base_h: int = 512, base_w: int = 512) -> np.ndarray: - scope_add(suppress_computation_warnings()) - - pixels = -1 - i = 0 - while pixels < self._threshold and i < self._iterations: - param_size = rng.randint(2, 7) - params = np.zeros((param_size, IFSFunction.NUM_PARAMS + 1), dtype=np.float32) - - sum_proba = 1e-5 - for p_idx in range(param_size): - a, b, c, d, e, f = [rng.uniform(-1.0, 1.0) for _ in range(IFSFunction.NUM_PARAMS)] - prob = abs(a * d - b * c) - sum_proba += prob - params[p_idx] = a, b, c, d, e, f, prob - params[:, IFSFunction.NUM_PARAMS] /= sum_proba - - fractal_img = self._generate_image(rng, params, self._num_of_points, base_h, base_w) - pixels = np.count_nonzero(fractal_img) / (base_h * base_w) - i += 1 - return params - - def _initialize_params(self) -> None: - if self._count < self._weights.shape[0]: - self._weights = self._weights[: self._count, :] - - instances_categories = np.ceil(self._count / self._weights.shape[0]) - self._instances = np.ceil(np.sqrt(instances_categories)).astype(int) - self._categories = np.ceil(instances_categories / self._instances).astype(int) - - @staticmethod - def _create_weights(num_params): - # weights from https://openaccess.thecvf.com/content/ACCV2020/papers/Kataoka_Pre-training_without_Natural_Images_ACCV_2020_paper.pdf - BASE_WEIGHTS = np.ones((num_params,)) - WEIGHT_INTERVAL = 0.4 - INTERVAL_MULTIPLIERS = (-2, -1, 1, 2) - weight_vectors = [BASE_WEIGHTS] - - for weight_index in range(num_params): - for multiplier in INTERVAL_MULTIPLIERS: - modified_weights = BASE_WEIGHTS.copy() - modified_weights[weight_index] += multiplier * WEIGHT_INTERVAL - weight_vectors.append(modified_weights) - weights = np.array(weight_vectors) - return weights - - @classmethod - def _download_colorization_model(cls, save_dir: str) -> None: - prototxt_file_name = cls._MODEL_PROTO_FILENAME - caffemodel_file_name = cls._MODEL_WEIGHTS_FILENAME - hull_file_name = cls._HULL_PTS_FILE_NAME - - proto_path = osp.join(save_dir, prototxt_file_name) - model_path = osp.join(save_dir, caffemodel_file_name) - hull_path = osp.join(save_dir, hull_file_name) - if not ( - osp.exists(proto_path) and osp.exists(model_path) and osp.exists(hull_path) - ) and not os.access(save_dir, os.W_OK): - raise ValueError( - "Please provide a path to a colorization model directory or " - "a path to a writable directory to download the model" - ) - - for url, filename, size, sha512_checksum in [ - ( - f"https://raw.githubusercontent.com/richzhang/colorization/a1642d6ac6fc80fe08885edba34c166da09465f6/colorization/models/{prototxt_file_name}", - prototxt_file_name, - 9945, - "e3dd9188771202bd296623510bcf527b41c130fc9bae584e61dcdf66917b8c4d147b7b838fec0685568f7f287235c34e8b8e9c0482b555774795be89f0442820", - ), - ( - f"http://eecs.berkeley.edu/~rich.zhang/projects/2016_colorization/files/demo_v2/{caffemodel_file_name}", - caffemodel_file_name, - 128946764, - "3d773dd83cfcf8e846e3a9722a4d302a3b7a0f95a0a7ae1a3d3ef5fe62eecd617f4f30eefb1d8d6123be4a8f29f7c6e64f07b36193f45710b549f3e4796570f1", - ), - ( - f"https://raw.githubusercontent.com/richzhang/colorization/a1642d6ac6fc80fe08885edba34c166da09465f6/colorization/resources/{hull_file_name}", - hull_file_name, - 5088, - "bf59a8a4e74b18948e4aeaa430f71eb8603bd9dbbce207ea086dd0fb976a34672beaeea6f1233a21687da710e0f8d36e86133a8532265dfda52994a7d6f0dbf5", - ), - ]: - save_path = osp.join(save_dir, filename) - if osp.exists(save_path): - continue - - log.info("Downloading the '%s' file to '%s'", filename, save_dir) - try: - cls._download_file( - url, save_path, expected_size=size, expected_checksum=sha512_checksum - ) - except Exception as e: - raise Exception(f"Failed to download the '{filename}' file: {str(e)}") from e - - @staticmethod - @scoped - def _download_file( - url: str, output_path: str, *, timeout: int = 60, expected_size: int, expected_checksum: str - ) -> None: - BLOCK_SIZE = 2**20 - - assert not osp.exists(output_path) - - tmp_path = output_path + ".tmp" - if osp.exists(tmp_path): - raise Exception(f"Can't write temporary file '{tmp_path}' - file exists") - - response = requests.get(url, timeout=timeout, stream=True) - on_exit_do(response.close) - - response.raise_for_status() - - checksum_counter = hashlib.sha512() - actual_size = 0 - - with open(tmp_path, "wb") as fd: - on_error_do(os.unlink, tmp_path) - - for chunk in response.iter_content(chunk_size=BLOCK_SIZE): - actual_size += len(chunk) - if actual_size > expected_size: - # There is also the context-length header, but it can be corrupted or invalid - # for different reasons - raise Exception( - f"The downloaded file has unexpected size, expected {expected_size}." - ) - - checksum_counter.update(chunk) - - fd.write(chunk) - - actual_checksum = checksum_counter.hexdigest() - if actual_checksum.lower() != expected_checksum.lower(): - raise Exception("The downloaded file has unexpected checksum") - - os.rename(tmp_path, output_path) diff --git a/src/datumaro/plugins/synthetic_data/utils.py b/src/datumaro/plugins/synthetic_data/utils.py deleted file mode 100644 index 6fa72eaca9..0000000000 --- a/src/datumaro/plugins/synthetic_data/utils.py +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (C) 2022 Intel Corporation -# -# SPDX-License-Identifier: MIT - -import warnings -from contextlib import contextmanager -from random import Random -from typing import ContextManager - -import cv2 as cv -import numpy as np - - -class IFSFunction: - NUM_PARAMS = 6 - - def __init__(self, rng, prev_x, prev_y): - self.function = [] - self.xs, self.ys = [prev_x], [prev_y] - self.select_function = [] - self.cum_proba = 0.0 - self._rng = rng - - def add_param(self, params, proba, weights=None): - if weights is not None: - params = list(np.array(params) * np.array(weights)) - - self.function.append(params) - self.cum_proba += proba - self.select_function.append(self.cum_proba) - - def calculate(self, iterations): - prev_x, prev_y = self.xs[-1], self.ys[-1] - next_x, next_y = self.xs[-1], self.ys[-1] - - for _ in range(iterations): - rand = self._rng.random() - for func_params, select_func in zip(self.function, self.select_function): - a, b, c, d, e, f = func_params - if rand <= select_func: - next_x = prev_x * a + prev_y * b + e - next_y = prev_x * c + prev_y * d + f - break - - self.xs.append(next_x) - self.ys.append(next_y) - prev_x = next_x - prev_y = next_y - - @staticmethod - def process_nans(data): - nan_index = np.nonzero(np.isnan(data)) - extend = np.array(range(nan_index[0][0] - 100, nan_index[0][0])) - delete_row = np.append(extend, nan_index) - return delete_row - - def rescale(self, image_x, image_y, pad_x, pad_y): - if image_x < 2 * pad_x or image_y < 2 * pad_y: - raise ValueError( - f"Image generation with height < {2 * pad_x} or " - f"width < {2 * pad_y} is not supported" - ) - - xs = np.array(self.xs) - ys = np.array(self.ys) - if np.any(np.isnan(xs)): - delete_row = self.process_nans(xs) - xs = np.delete(xs, delete_row, axis=0) - ys = np.delete(ys, delete_row, axis=0) - - if np.any(np.isnan(ys)): - delete_row = self.process_nans(ys) - xs = np.delete(xs, delete_row, axis=0) - ys = np.delete(ys, delete_row, axis=0) - - if np.min(xs) < 0.0: - xs -= np.min(xs) - if np.min(ys) < 0.0: - ys -= np.min(ys) - xmax, xmin = np.max(xs), np.min(xs) - ymax, ymin = np.max(ys), np.min(ys) - self.xs = np.uint16(xs / (xmax - xmin + 1e-5) * (image_x - 2 * pad_x) + pad_x) - self.ys = np.uint16(ys / (ymax - ymin + 1e-5) * (image_y - 2 * pad_y) + pad_y) - - def draw(self, image_x, image_y, draw_point, pad_x=6, pad_y=6): - self.rescale(image_x, image_y, pad_x, pad_y) - image = np.zeros((image_x, image_y), dtype=np.uint8) - for x, y in zip(self.xs, self.ys): - if draw_point: - image[x, y] = 127 - else: - mask = "{:09b}".format(self._rng.randint(1, 511)) - patch = 127 * np.array(list(map(int, list(mask))), dtype=np.uint8).reshape(3, 3) - image[x + 1 : x + 4, y + 1 : y + 4] = patch - - return image - - -def rgb2lab(frame: np.ndarray) -> np.ndarray: - # Use of the OpenCV version sometimes leads to hangs - y_coeffs = np.array([0.212671, 0.715160, 0.072169], dtype=np.float32) - frame = np.where(frame > 0.04045, np.power((frame + 0.055) / 1.055, 2.4), frame / 12.92) - y = frame @ y_coeffs.T - L = np.where(y > 0.008856, 116 * np.cbrt(y) - 16, 903.3 * y) - return L - - -def colorize(frame, net): - H_orig, W_orig = frame.shape[:2] - if len(frame.shape) == 2 or frame.shape[-1] == 1: - frame = np.tile(frame.reshape(H_orig, W_orig, 1), (1, 1, 3)) - - frame = frame.astype(np.float32) / 255 - img_l = rgb2lab(frame) # get L from Lab image - img_rs = cv.resize(img_l, (224, 224)) # resize image to network input size - img_l_rs = img_rs - 50 # subtract 50 for mean-centering - - net.setInput(cv.dnn.blobFromImage(img_l_rs)) - ab_dec = net.forward()[0, :, :, :].transpose((1, 2, 0)) - - ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig)) - img_lab_out = np.concatenate( - (img_l[..., np.newaxis], ab_dec_us), axis=2 - ) # concatenate with original image L - img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1) - frame_normed = 255 * (img_bgr_out - img_bgr_out.min()) / (img_bgr_out.max() - img_bgr_out.min()) - frame_normed = np.array(frame_normed, dtype=np.uint8) - return cv.resize(frame_normed, (W_orig, H_orig)) - - -def augment(rng: Random, image: np.ndarray, colors: np.ndarray) -> np.ndarray: - if rng.random() >= 0.5: - image = cv.flip(image, 1) - - if rng.random() >= 0.5: - image = cv.flip(image, 0) - - height, width = image.shape[:2] - angle = rng.uniform(-30, 30) - rotate_matrix = cv.getRotationMatrix2D(center=(width / 2, height / 2), angle=angle, scale=1) - image = cv.warpAffine(src=image, M=rotate_matrix, dsize=(width, height)) - - image = fill_background(rng, image, colors) - if rng.random() >= 0.3: - k_size = rng.choice(list(range(3, 16, 2))) - image = cv.GaussianBlur(image, (k_size, k_size), 0) - return image - - -def fill_background(rng: Random, image: np.ndarray, colors: np.ndarray) -> np.ndarray: - rows, cols = np.nonzero(~np.any(image, axis=-1)) # background color = [0, 0, 0] - image[rows, cols] = rng.choice(colors) - return image - - -@contextmanager -def suppress_computation_warnings() -> ContextManager: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - message=r"(invalid value|overflow) encountered", - module=__package__, - append=True, - ) - yield diff --git a/src/datumaro/util/telemetry_utils.py b/src/datumaro/util/telemetry_utils.py index 71b6d76f95..4965062fc5 100644 --- a/src/datumaro/util/telemetry_utils.py +++ b/src/datumaro/util/telemetry_utils.py @@ -61,8 +61,6 @@ def _get_action_name(command): return "create_result" elif command is commands.compare.compare_command: return "diff_result" - elif command is commands.generate.generate_command: - return "generate_result" elif command is commands.info.info_command: return "info_result" elif command is commands.require_project.versioning.log.log_command: diff --git a/tests/unit/test_fractal_image_generator.py b/tests/unit/test_fractal_image_generator.py deleted file mode 100644 index d078c93895..0000000000 --- a/tests/unit/test_fractal_image_generator.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -import os.path as osp -from unittest import TestCase - -import numpy as np -import pytest - -from datumaro.plugins.synthetic_data import FractalImageGenerator -from datumaro.util.image import load_image - -from ..requirements import Requirements, mark_requirement - -from tests.utils.assets import get_test_asset_path -from tests.utils.test_utils import TestDir - - -@pytest.mark.xfail(reason="Cannot download the model file from the source") -class FractalImageGeneratorTest(TestCase): - @mark_requirement(Requirements.DATUM_677) - def test_save_image_can_create_dir(self): - with TestDir() as test_dir: - dataset_size = 2 - FractalImageGenerator(test_dir, dataset_size, shape=[22, 25]).generate_dataset() - image_files = os.listdir(test_dir) - self.assertEqual(len(image_files), dataset_size) - - for filename in image_files: - image = load_image(osp.join(test_dir, filename)) - H, W, C = image.shape - - self.assertEqual(H, 22) - self.assertEqual(W, 25) - self.assertEqual(C, 3) - - @mark_requirement(Requirements.DATUM_677) - def test_can_generate_image(self): - ref_dir = get_test_asset_path("synthetic_dataset", "images") - with TestDir() as test_dir: - dataset_size = 3 - FractalImageGenerator(test_dir, dataset_size, shape=[24, 36]).generate_dataset() - image_files = os.listdir(test_dir) - self.assertEqual(len(image_files), dataset_size) - - for filename in image_files: - actual = load_image(osp.join(test_dir, filename)) - expected = load_image(osp.join(ref_dir, filename)) - np.testing.assert_array_equal(actual, expected)