diff --git a/openai/__init__.py b/openai/__init__.py index f80085eada..1a69e24485 100644 --- a/openai/__init__.py +++ b/openai/__init__.py @@ -3,16 +3,26 @@ # Originally forked from the MIT-licensed Stripe Python bindings. import os +import sys +from typing import TYPE_CHECKING, Optional + from contextvars import ContextVar -from typing import Optional, TYPE_CHECKING + +if "pkg_resources" not in sys.modules: + # workaround for the following: + # https://github.com/benoitc/gunicorn/pull/2539 + sys.modules["pkg_resources"] = object() # type: ignore[assignment] + import aiohttp + + del sys.modules["pkg_resources"] from openai.api_resources import ( Audio, ChatCompletion, Completion, Customer, - Edit, Deployment, + Edit, Embedding, Engine, ErrorObject, diff --git a/openai/api_resources/embedding.py b/openai/api_resources/embedding.py index 4eb97c68bf..e937636404 100644 --- a/openai/api_resources/embedding.py +++ b/openai/api_resources/embedding.py @@ -1,10 +1,10 @@ import base64 import time - from openai import util from openai.api_resources.abstract.engine_api_resource import EngineAPIResource -from openai.datalib import numpy as np, assert_has_numpy +from openai.datalib.numpy_helper import assert_has_numpy +from openai.datalib.numpy_helper import numpy as np from openai.error import TryAgain diff --git a/openai/datalib.py b/openai/datalib/common.py similarity index 67% rename from openai/datalib.py rename to openai/datalib/common.py index 2781cfc4db..678b3395fd 100644 --- a/openai/datalib.py +++ b/openai/datalib/common.py @@ -13,19 +13,6 @@ See also `setup.py`. """ -try: - import numpy -except ImportError: - numpy = None - -try: - import pandas -except ImportError: - pandas = None - -HAS_NUMPY = bool(numpy) -HAS_PANDAS = bool(pandas) - INSTRUCTIONS = """ OpenAI error: @@ -39,18 +26,7 @@ """ NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy") -PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas") class MissingDependencyError(Exception): pass - - -def assert_has_numpy(): - if not HAS_NUMPY: - raise MissingDependencyError(NUMPY_INSTRUCTIONS) - - -def assert_has_pandas(): - if not HAS_PANDAS: - raise MissingDependencyError(PANDAS_INSTRUCTIONS) diff --git a/openai/datalib/numpy_helper.py b/openai/datalib/numpy_helper.py new file mode 100644 index 0000000000..fb80f2ae54 --- /dev/null +++ b/openai/datalib/numpy_helper.py @@ -0,0 +1,15 @@ +from openai.datalib.common import INSTRUCTIONS, MissingDependencyError + +try: + import numpy +except ImportError: + numpy = None + +HAS_NUMPY = bool(numpy) + +NUMPY_INSTRUCTIONS = INSTRUCTIONS.format(library="numpy") + + +def assert_has_numpy(): + if not HAS_NUMPY: + raise MissingDependencyError(NUMPY_INSTRUCTIONS) diff --git a/openai/datalib/pandas_helper.py b/openai/datalib/pandas_helper.py new file mode 100644 index 0000000000..4e86d7b4f9 --- /dev/null +++ b/openai/datalib/pandas_helper.py @@ -0,0 +1,15 @@ +from openai.datalib.common import INSTRUCTIONS, MissingDependencyError + +try: + import pandas +except ImportError: + pandas = None + +HAS_PANDAS = bool(pandas) + +PANDAS_INSTRUCTIONS = INSTRUCTIONS.format(library="pandas") + + +def assert_has_pandas(): + if not HAS_PANDAS: + raise MissingDependencyError(PANDAS_INSTRUCTIONS) diff --git a/openai/embeddings_utils.py b/openai/embeddings_utils.py index 056c2065c1..08fa94c2ea 100644 --- a/openai/embeddings_utils.py +++ b/openai/embeddings_utils.py @@ -10,8 +10,8 @@ from tenacity import retry, stop_after_attempt, wait_random_exponential import openai -from openai.datalib import numpy as np -from openai.datalib import pandas as pd +from openai.datalib.numpy_helper import numpy as np +from openai.datalib.pandas_helper import pandas as pd @retry(wait=wait_random_exponential(min=1, max=20), stop=stop_after_attempt(6)) diff --git a/openai/tests/test_long_examples_validator.py b/openai/tests/test_long_examples_validator.py index 0cac1360cd..949a7cbbae 100644 --- a/openai/tests/test_long_examples_validator.py +++ b/openai/tests/test_long_examples_validator.py @@ -4,12 +4,8 @@ import pytest -from openai.datalib import ( - HAS_NUMPY, - HAS_PANDAS, - NUMPY_INSTRUCTIONS, - PANDAS_INSTRUCTIONS, -) +from openai.datalib.numpy_helper import HAS_NUMPY, NUMPY_INSTRUCTIONS +from openai.datalib.pandas_helper import HAS_PANDAS, PANDAS_INSTRUCTIONS @pytest.mark.skipif(not HAS_PANDAS, reason=PANDAS_INSTRUCTIONS) @@ -54,5 +50,5 @@ def test_long_examples_validator() -> None: assert prepared_data_cmd_output.stderr == "" # validate get_long_indexes() applied during optional_fn() call in long_examples_validator() assert "indices of the long examples has changed" in prepared_data_cmd_output.stdout - + return prepared_data_cmd_output.stdout diff --git a/openai/validators.py b/openai/validators.py index b15e59bee3..97083659df 100644 --- a/openai/validators.py +++ b/openai/validators.py @@ -2,7 +2,8 @@ import sys from typing import Any, Callable, NamedTuple, Optional -from openai.datalib import pandas as pd, assert_has_pandas +from openai.datalib.pandas_helper import assert_has_pandas +from openai.datalib.pandas_helper import pandas as pd class Remediation(NamedTuple): @@ -158,6 +159,7 @@ def long_examples_validator(df): ft_type = infer_task_type(df) if ft_type != "open-ended generation": + def get_long_indexes(d): long_examples = d.apply( lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1 @@ -171,10 +173,12 @@ def get_long_indexes(d): optional_msg = f"Remove {len(long_indexes)} long examples" def optional_fn(x): - + long_indexes_to_drop = get_long_indexes(x) if long_indexes != long_indexes_to_drop: - sys.stdout.write(f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n") + sys.stdout.write( + f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n" + ) return x.drop(long_indexes_to_drop) return Remediation( diff --git a/openai/wandb_logger.py b/openai/wandb_logger.py index ba650d1fe4..fdd8c24adc 100644 --- a/openai/wandb_logger.py +++ b/openai/wandb_logger.py @@ -14,8 +14,8 @@ from pathlib import Path from openai import File, FineTune - from openai.datalib import numpy as np - from openai.datalib import pandas as pd + from openai.datalib.numpy_helper import numpy as np + from openai.datalib.pandas_helper import pandas as pd class WandbLogger: