Skip to content

Commit 45ca931

Browse files
committed
[ENH] Add local support for schema
1 parent f60a76e commit 45ca931

File tree

14 files changed

+172
-67
lines changed

14 files changed

+172
-67
lines changed

chromadb/api/rust.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,7 @@ def list_collections(
193193
CollectionModel(
194194
id=collection.id,
195195
name=collection.name,
196-
serialized_schema=None,
196+
serialized_schema=collection.schema,
197197
configuration_json=collection.configuration,
198198
metadata=collection.metadata,
199199
dimension=collection.dimension,
@@ -229,14 +229,25 @@ def create_collection(
229229
else:
230230
configuration_json_str = None
231231

232+
if schema:
233+
schema_str = json.dumps(schema.serialize_to_json())
234+
else:
235+
schema_str = None
236+
232237
collection = self.bindings.create_collection(
233-
name, configuration_json_str, metadata, get_or_create, tenant, database
238+
name,
239+
configuration_json_str,
240+
schema_str,
241+
metadata,
242+
get_or_create,
243+
tenant,
244+
database,
234245
)
235246
collection_model = CollectionModel(
236247
id=collection.id,
237248
name=collection.name,
238249
configuration_json=collection.configuration,
239-
serialized_schema=None,
250+
serialized_schema=collection.schema,
240251
metadata=collection.metadata,
241252
dimension=collection.dimension,
242253
tenant=collection.tenant,
@@ -256,7 +267,7 @@ def get_collection(
256267
id=collection.id,
257268
name=collection.name,
258269
configuration_json=collection.configuration,
259-
serialized_schema=None,
270+
serialized_schema=collection.schema,
260271
metadata=collection.metadata,
261272
dimension=collection.dimension,
262273
tenant=collection.tenant,

chromadb/chromadb_rust_bindings.pyi

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ class Bindings:
9595
self,
9696
name: str,
9797
configuration_json_str: Optional[str] = None,
98+
schema_str: Optional[str] = None,
9899
metadata: Optional[CollectionMetadata] = None,
99100
get_or_create: bool = False,
100101
tenant: str = DEFAULT_TENANT,

rust/frontend/src/config.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ fn default_enable_span_indexing() -> bool {
142142
}
143143

144144
fn default_enable_schema() -> bool {
145-
false
145+
true
146146
}
147147

148148
pub fn default_min_records_for_task() -> u64 {

rust/frontend/src/get_collection_with_segments_provider.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ use chroma_config::Configurable;
44
use chroma_error::{ChromaError, ErrorCodes};
55
use chroma_sysdb::SysDb;
66
use chroma_types::{
7-
CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, Schema, SchemaError,
7+
CollectionAndSegments, CollectionUuid, GetCollectionWithSegmentsError, KnnIndex, Schema,
8+
SchemaError,
89
};
910
use serde::{Deserialize, Serialize};
1011
use std::{
@@ -142,6 +143,7 @@ impl CollectionsWithSegmentsProvider {
142143
pub(crate) async fn get_collection_with_segments(
143144
&mut self,
144145
collection_id: CollectionUuid,
146+
knn_index: KnnIndex,
145147
) -> Result<CollectionAndSegments, CollectionsWithSegmentsProviderError> {
146148
if let Some(collection_and_segments_with_ttl) = self
147149
.collections_with_segments_cache
@@ -187,6 +189,7 @@ impl CollectionsWithSegmentsProvider {
187189
let reconciled_schema = Schema::reconcile_schema_and_config(
188190
collection_and_segments_sysdb.collection.schema.as_ref(),
189191
Some(&collection_and_segments_sysdb.collection.config),
192+
knn_index,
190193
)
191194
.map_err(CollectionsWithSegmentsProviderError::InvalidSchema)?;
192195
collection_and_segments_sysdb.collection.schema = Some(reconciled_schema);

rust/frontend/src/impls/service_based_frontend.rs

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ impl ServiceBasedFrontend {
176176
) -> Result<Collection, GetCollectionError> {
177177
Ok(self
178178
.collections_with_segments_provider
179-
.get_collection_with_segments(collection_id)
179+
.get_collection_with_segments(collection_id, self.default_knn_index)
180180
.await
181181
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?
182182
.collection)
@@ -188,7 +188,7 @@ impl ServiceBasedFrontend {
188188
) -> Result<Option<u32>, GetCollectionError> {
189189
Ok(self
190190
.collections_with_segments_provider
191-
.get_collection_with_segments(collection_id)
191+
.get_collection_with_segments(collection_id, self.default_knn_index)
192192
.await
193193
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?
194194
.collection
@@ -381,7 +381,7 @@ impl ServiceBasedFrontend {
381381
if self.enable_schema {
382382
for collection in collections.iter_mut() {
383383
collection
384-
.reconcile_schema_with_config()
384+
.reconcile_schema_with_config(self.default_knn_index)
385385
.map_err(GetCollectionsError::InvalidSchema)?;
386386
}
387387
}
@@ -425,7 +425,7 @@ impl ServiceBasedFrontend {
425425
if self.enable_schema {
426426
for collection in &mut collections {
427427
collection
428-
.reconcile_schema_with_config()
428+
.reconcile_schema_with_config(self.default_knn_index)
429429
.map_err(GetCollectionError::InvalidSchema)?;
430430
}
431431
}
@@ -450,7 +450,7 @@ impl ServiceBasedFrontend {
450450

451451
if self.enable_schema {
452452
collection
453-
.reconcile_schema_with_config()
453+
.reconcile_schema_with_config(self.default_knn_index)
454454
.map_err(GetCollectionByCrnError::InvalidSchema)?;
455455
}
456456
Ok(collection)
@@ -517,6 +517,7 @@ impl ServiceBasedFrontend {
517517
match Schema::reconcile_schema_and_config(
518518
schema.as_ref(),
519519
config_for_reconcile.as_ref(),
520+
self.default_knn_index,
520521
) {
521522
Ok(schema) => Some(schema),
522523
Err(e) => {
@@ -616,7 +617,7 @@ impl ServiceBasedFrontend {
616617
// that was retrieved from sysdb, rather than the one that was passed in
617618
if self.enable_schema {
618619
collection
619-
.reconcile_schema_with_config()
620+
.reconcile_schema_with_config(self.default_knn_index)
620621
.map_err(CreateCollectionError::InvalidSchema)?;
621622
}
622623
Ok(collection)
@@ -721,7 +722,7 @@ impl ServiceBasedFrontend {
721722
.await?;
722723
collection_and_segments
723724
.collection
724-
.reconcile_schema_with_config()
725+
.reconcile_schema_with_config(self.default_knn_index)
725726
.map_err(ForkCollectionError::InvalidSchema)?;
726727
let collection = collection_and_segments.collection.clone();
727728
let latest_collection_logical_size_bytes = collection_and_segments
@@ -1085,7 +1086,7 @@ impl ServiceBasedFrontend {
10851086
let read_event = if let Some(where_clause) = r#where {
10861087
let collection_and_segments = self
10871088
.collections_with_segments_provider
1088-
.get_collection_with_segments(collection_id)
1089+
.get_collection_with_segments(collection_id, self.default_knn_index)
10891090
.await
10901091
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
10911092
if self.enable_schema {
@@ -1295,7 +1296,7 @@ impl ServiceBasedFrontend {
12951296
) -> Result<CountResponse, QueryError> {
12961297
let collection_and_segments = self
12971298
.collections_with_segments_provider
1298-
.get_collection_with_segments(collection_id)
1299+
.get_collection_with_segments(collection_id, self.default_knn_index)
12991300
.await
13001301
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
13011302
let latest_collection_logical_size_bytes = collection_and_segments
@@ -1410,7 +1411,7 @@ impl ServiceBasedFrontend {
14101411
) -> Result<GetResponse, QueryError> {
14111412
let collection_and_segments = self
14121413
.collections_with_segments_provider
1413-
.get_collection_with_segments(collection_id)
1414+
.get_collection_with_segments(collection_id, self.default_knn_index)
14141415
.await
14151416
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
14161417
if self.enable_schema {
@@ -1555,7 +1556,7 @@ impl ServiceBasedFrontend {
15551556
) -> Result<QueryResponse, QueryError> {
15561557
let collection_and_segments = self
15571558
.collections_with_segments_provider
1558-
.get_collection_with_segments(collection_id)
1559+
.get_collection_with_segments(collection_id, self.default_knn_index)
15591560
.await
15601561
.map_err(|err| Box::new(err) as Box<dyn ChromaError>)?;
15611562
if self.enable_schema {
@@ -1712,7 +1713,7 @@ impl ServiceBasedFrontend {
17121713
// Get collection and segments once for all queries
17131714
let collection_and_segments = self
17141715
.collections_with_segments_provider
1715-
.get_collection_with_segments(request.collection_id)
1716+
.get_collection_with_segments(request.collection_id, self.default_knn_index)
17161717
.await
17171718
.map_err(|err| QueryError::Other(Box::new(err) as Box<dyn ChromaError>))?;
17181719
if self.enable_schema {

rust/python_bindings/src/bindings.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ impl Bindings {
113113
let executor_config = ExecutorConfig::Local(LocalExecutorConfig {});
114114

115115
let knn_index = KnnIndex::Hnsw;
116-
let enable_schema = false;
116+
let enable_schema = true;
117117

118118
let frontend_config = FrontendConfig {
119119
allow_reset,
@@ -252,22 +252,22 @@ impl Bindings {
252252

253253
#[allow(clippy::too_many_arguments)]
254254
#[pyo3(
255-
signature = (name, configuration_json_str, metadata = None, get_or_create = false, tenant = DEFAULT_TENANT.to_string(), database = DEFAULT_DATABASE.to_string())
255+
signature = (name, configuration_json_str = None, schema_str = None, metadata = None, get_or_create = false, tenant = DEFAULT_TENANT.to_string(), database = DEFAULT_DATABASE.to_string())
256256
)]
257257
fn create_collection(
258258
&self,
259259
name: String,
260260
configuration_json_str: Option<String>,
261+
schema_str: Option<String>,
261262
metadata: Option<Metadata>,
262263
get_or_create: bool,
263264
tenant: String,
264265
database: String,
265266
) -> ChromaPyResult<Collection> {
266267
let configuration_json = match configuration_json_str {
267268
Some(configuration_json_str) => {
268-
let configuration_json =
269-
serde_json::from_str::<CollectionConfiguration>(&configuration_json_str)
270-
.map_err(WrappedSerdeJsonError::SerdeJsonError)?;
269+
let configuration_json = serde_json::from_str(&configuration_json_str)
270+
.map_err(WrappedSerdeJsonError::SerdeJsonError)?;
271271

272272
Some(configuration_json)
273273
}
@@ -291,13 +291,20 @@ impl Bindings {
291291
)?),
292292
};
293293

294+
let schema = match schema_str {
295+
Some(schema_str) => {
296+
serde_json::from_str(&schema_str).map_err(WrappedSerdeJsonError::SerdeJsonError)?
297+
}
298+
None => None,
299+
};
300+
294301
let request = CreateCollectionRequest::try_new(
295302
tenant,
296303
database,
297304
name,
298305
metadata,
299306
configuration,
300-
None,
307+
schema,
301308
get_or_create,
302309
)?;
303310

rust/segment/src/distributed_spann.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ use chroma_index::spann::types::{
1515
use chroma_index::IndexUuid;
1616
use chroma_index::{hnsw_provider::HnswIndexProvider, spann::types::SpannIndexWriter};
1717
use chroma_types::Collection;
18+
use chroma_types::KnnIndex;
1819
use chroma_types::Schema;
1920
use chroma_types::SchemaError;
2021
use chroma_types::SegmentUuid;
@@ -114,6 +115,7 @@ impl SpannSegmentWriter {
114115
let reconciled_schema = Schema::reconcile_schema_and_config(
115116
collection.schema.as_ref(),
116117
Some(&collection.config),
118+
KnnIndex::Spann,
117119
)
118120
.map_err(SpannSegmentWriterError::InvalidSchema)?;
119121

@@ -619,8 +621,8 @@ mod test {
619621
use chroma_storage::{local::LocalStorage, Storage};
620622
use chroma_types::{
621623
Chunk, Collection, CollectionUuid, DatabaseUuid, InternalCollectionConfiguration,
622-
InternalSpannConfiguration, LogRecord, Operation, OperationRecord, Schema, SegmentUuid,
623-
SpannPostingList,
624+
InternalSpannConfiguration, KnnIndex, LogRecord, Operation, OperationRecord, Schema,
625+
SegmentUuid, SpannPostingList,
624626
};
625627

626628
use crate::{
@@ -688,7 +690,7 @@ mod test {
688690
..Default::default()
689691
};
690692
collection.schema = Some(
691-
Schema::reconcile_schema_and_config(None, Some(&collection.config))
693+
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
692694
.expect("Error reconciling schema for test collection"),
693695
);
694696

@@ -925,7 +927,7 @@ mod test {
925927
..Default::default()
926928
};
927929
collection.schema = Some(
928-
Schema::reconcile_schema_and_config(None, Some(&collection.config))
930+
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
929931
.expect("Error reconciling schema for test collection"),
930932
);
931933

@@ -1087,7 +1089,7 @@ mod test {
10871089
..Default::default()
10881090
};
10891091
collection.schema = Some(
1090-
Schema::reconcile_schema_and_config(None, Some(&collection.config))
1092+
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
10911093
.expect("Error reconciling schema for test collection"),
10921094
);
10931095

@@ -1218,7 +1220,7 @@ mod test {
12181220
..Default::default()
12191221
};
12201222
collection.schema = Some(
1221-
Schema::reconcile_schema_and_config(None, Some(&collection.config))
1223+
Schema::reconcile_schema_and_config(None, Some(&collection.config), KnnIndex::Spann)
12221224
.expect("Error reconciling schema for test collection"),
12231225
);
12241226

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- Stores collection schema as stringified json
2+
ALTER TABLE collections ADD COLUMN schema_str TEXT;

rust/sqlite/src/table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pub enum Collections {
4343
Dimension,
4444
DatabaseId,
4545
ConfigJsonStr,
46+
SchemaStr,
4647
}
4748

4849
#[derive(Iden)]

0 commit comments

Comments
 (0)