-
Notifications
You must be signed in to change notification settings - Fork 23
Expand file tree
/
Copy pathpyproject.toml
More file actions
125 lines (113 loc) · 3.21 KB
/
pyproject.toml
File metadata and controls
125 lines (113 loc) · 3.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "dataflex"
authors = [
{name = "Hao Liang", email = "hao.liang@stu.pku.edu.cn"},
]
description = "A data-centric training system for Large Language Models"
readme = {file = "README.md", content-type = "text/markdown"}
requires-python = ">=3.10"
license = {text = "Apache-2.0"}
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"Intended Audience :: Education",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3 :: Only",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
keywords = ["AI", "data-centric", "LLM", "training", "data selection", "Data Mixture"]
dynamic = ["version", "dependencies"]
[project.urls]
Github = "https://github.com/Open-DataFlow/DataFlex"
Documentation = "https://open-dataflow.github.io/DataFlex-Doc/"
"Bug Reports" = "https://github.com/Open-DataFlow/DataFlex/issues"
[project.scripts]
dataflex-cli = "dataflex.cli:main"
[project.optional-dependencies]
torch = ["torch>=2.0.0", "torchvision>=0.15.0"]
torch-npu = ["torch==2.4.0", "torch-npu==2.4.0.post2", "decorator"]
metrics = ["nltk", "jieba", "rouge-chinese"]
deepspeed = ["deepspeed>=0.10.0,<=0.16.9"]
liger-kernel = ["liger-kernel>=0.5.5"]
bitsandbytes = ["bitsandbytes>=0.39.0"]
hqq = ["hqq"]
eetq = ["eetq"]
gptq = ["optimum>=1.24.0", "gptqmodel>=2.0.0"]
aqlm = ["aqlm[gpu]>=1.1.0"]
vllm = ["vllm>=0.4.3,<=0.9.1"]
sglang = ["sglang[srt]>=0.4.5", "transformers==4.51.1"]
galore = ["galore-torch"]
apollo = ["apollo-torch"]
badam = ["badam>=1.2.1"]
adam-mini = ["adam-mini"]
minicpm_v = [
"soundfile",
"torchvision",
"torchaudio",
"vector_quantize_pytorch",
"vocos",
"msgpack",
"referencing",
"jsonschema_specifications",
]
modelscope = ["modelscope"]
openmind = ["openmind"]
swanlab = ["swanlab"]
dev = ["pre-commit", "ruff", "pytest", "build"]
[tool.setuptools]
include-package-data = true
[tool.setuptools.packages.find]
where = ["src"]
[tool.setuptools.dynamic]
version = {attr = "dataflex.version.__version__"}
dependencies = {file = "requirements.txt"}
[tool.ruff]
target-version = "py310"
line-length = 119
indent-width = 4
[tool.ruff.lint]
ignore = ["C408", "C901", "E501", "E731", "E741", "W605"]
select = ["C", "E", "F", "I", "W"]
[tool.ruff.lint.isort]
lines-after-imports = 2
known-first-party = ["dataflex"]
known-third-party = [
"accelerate",
"datasets",
"gradio",
"numpy",
"peft",
"torch",
"transformers",
"trl"
]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
docstring-code-format = true
skip-magic-trailing-comma = false
line-ending = "auto"
[tool.uv]
conflicts = [
[
{ extra = "torch-npu" },
{ extra = "aqlm" },
],
[
{ extra = "torch-npu" },
{ extra = "liger-kernel" },
],
[
{ extra = "torch-npu" },
{ extra = "vllm" },
]
]