Skip to content

Commit 6754f68

Browse files
authored
Minimize bindgen type information (#1260)
This commit is aimed at addressing bytecodealliance/wasmtime#7266 as well as a preexisting wart in wit-component. Guest bindings generators use `wit-component::metadata` to embed type information in compiled wasm modules. Currently this creates a custom section with a custom binary format. The binary format has a few prefix bytes/strings and then has a wasm-encoded WIT package with the world that was bound. The problem with this, as shown in the previous issue, is that the WIT package is possibly much larger than the world that was bound meaning that it has a lot more type information than necessary. This means that incompatibilities in interfaces that weren't actually used cause errors which is annoying to keep in sync and technically not necessary to warn/error about. This commit aims to solve two separate problems at the same time: * Type information will now be minimized storing only exactly what's needed for the bound world. * The custom section format will become an actual component without any extra headers to make extraction/debugging easier. Bindings generators currently all operate on worlds as the unit of generation, meaning that the metadata to include in this custom section is effectively a WIT world. Conveniently a WIT world can be exactly represented as a WebAssembly component types, and doubly conveniently there's already an `encode_world` function. This means that the custom section format for bindings generators is now a component which exports a single component type. The component type represents the world that was bound as part of the bindings generation process. Other minor details such as version information and string encoding selection is now stored in a custom section of the component created instead of in the header. Various guards are still in place against future versions but this should help make the format more easily debuggable and more understandable by being "simply a component" with some extra metadata. The decoding process back into a WIT world needed a few minor updates for various reasons, but otherwise this integrated well into the existing structure of `wit-component`. As with other breaking changes coming into components at this point all this new support is disabled behind an environment variable by default. The hope is to release this to get support for the new format in a number of places and then switch over to the new format by default. Eventually down the road the old format will be deleted.
1 parent 22402e3 commit 6754f68

9 files changed

Lines changed: 361 additions & 135 deletions

File tree

crates/wasm-encoder/src/component/builder.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,12 @@ impl ComponentBuilder {
372372
inc(&mut self.core_funcs)
373373
}
374374

375+
/// Adds a new custom section to this component.
376+
pub fn custom_section(&mut self, section: &CustomSection<'_>) {
377+
self.flush();
378+
self.component.section(section);
379+
}
380+
375381
/// Adds a new custom section to this component.
376382
pub fn raw_custom_section(&mut self, section: &[u8]) {
377383
self.flush();

crates/wit-component/src/decoding.rs

Lines changed: 108 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,7 @@ impl<'a> ComponentInfo<'a> {
123123
}
124124

125125
fn decode_wit_v1_package(&self) -> Result<(Resolve, PackageId)> {
126-
let resolve = Resolve::default();
127-
let mut decoder = WitPackageDecoder {
128-
resolve,
129-
info: self,
130-
type_map: HashMap::new(),
131-
foreign_packages: Default::default(),
132-
iface_to_package_index: Default::default(),
133-
named_interfaces: Default::default(),
134-
resources: Default::default(),
135-
};
126+
let mut decoder = WitPackageDecoder::new(&self.types);
136127

137128
let mut pkg = None;
138129
for (name, item) in self.externs.iter() {
@@ -162,16 +153,7 @@ impl<'a> ComponentInfo<'a> {
162153
}
163154

164155
fn decode_wit_v2_package(&self) -> Result<(Resolve, PackageId)> {
165-
let resolve = Resolve::default();
166-
let mut decoder = WitPackageDecoder {
167-
resolve,
168-
info: self,
169-
type_map: HashMap::new(),
170-
foreign_packages: Default::default(),
171-
iface_to_package_index: Default::default(),
172-
named_interfaces: Default::default(),
173-
resources: Default::default(),
174-
};
156+
let mut decoder = WitPackageDecoder::new(&self.types);
175157

176158
let mut pkg_name = None;
177159

@@ -252,12 +234,12 @@ impl<'a> ComponentInfo<'a> {
252234

253235
fn decode_component(&self) -> Result<(Resolve, WorldId)> {
254236
assert!(self.is_wit_package().is_none());
255-
let mut resolve = Resolve::default();
237+
let mut decoder = WitPackageDecoder::new(&self.types);
256238
// Note that this name is arbitrarily chosen. We may one day perhaps
257239
// want to encode this in the component binary format itself, but for
258240
// now it shouldn't be an issue to have a defaulted name here.
259241
let world_name = "root";
260-
let world = resolve.worlds.alloc(World {
242+
let world = decoder.resolve.worlds.alloc(World {
261243
name: world_name.to_string(),
262244
docs: Default::default(),
263245
imports: Default::default(),
@@ -266,15 +248,6 @@ impl<'a> ComponentInfo<'a> {
266248
includes: Default::default(),
267249
include_names: Default::default(),
268250
});
269-
let mut decoder = WitPackageDecoder {
270-
resolve,
271-
info: self,
272-
type_map: HashMap::new(),
273-
foreign_packages: Default::default(),
274-
iface_to_package_index: Default::default(),
275-
named_interfaces: Default::default(),
276-
resources: Default::default(),
277-
};
278251
let mut package = Package {
279252
// Similar to `world_name` above this is arbitrarily chosen as it's
280253
// not otherwise encoded in a binary component. This theoretically
@@ -371,14 +344,45 @@ pub fn decode(bytes: &[u8]) -> Result<DecodedWasm> {
371344
}
372345
}
373346

347+
/// Decodes the single component type `world` specified as a WIT world.
348+
///
349+
/// The `name` provided should be a full ID such as `foo:bar/baz`.
350+
pub(crate) fn decode_world(
351+
types: &types::Types,
352+
name: &str,
353+
world: types::ComponentTypeId,
354+
) -> Result<(Resolve, WorldId)> {
355+
let mut decoder = WitPackageDecoder::new(types);
356+
let mut interfaces = IndexMap::new();
357+
let mut worlds = IndexMap::new();
358+
let name = decoder.decode_world(
359+
name,
360+
&types[world],
361+
&mut PackageFields {
362+
interfaces: &mut interfaces,
363+
worlds: &mut worlds,
364+
},
365+
)?;
366+
let (resolve, pkg) = decoder.finish(Package {
367+
name,
368+
interfaces,
369+
worlds,
370+
docs: Default::default(),
371+
});
372+
// The package decoded here should only have a single world so extract that
373+
// here to return.
374+
let world = *resolve.packages[pkg].worlds.iter().next().unwrap().1;
375+
Ok((resolve, world))
376+
}
377+
374378
struct PackageFields<'a> {
375379
interfaces: &'a mut IndexMap<String, InterfaceId>,
376380
worlds: &'a mut IndexMap<String, WorldId>,
377381
}
378382

379383
struct WitPackageDecoder<'a> {
380384
resolve: Resolve,
381-
info: &'a ComponentInfo<'a>,
385+
types: &'a types::Types,
382386
foreign_packages: IndexMap<String, Package>,
383387
iface_to_package_index: HashMap<InterfaceId, usize>,
384388
named_interfaces: HashMap<String, InterfaceId>,
@@ -397,6 +401,18 @@ struct WitPackageDecoder<'a> {
397401
}
398402

399403
impl WitPackageDecoder<'_> {
404+
fn new<'a>(types: &'a types::Types) -> WitPackageDecoder<'a> {
405+
WitPackageDecoder {
406+
resolve: Resolve::default(),
407+
types,
408+
type_map: HashMap::new(),
409+
foreign_packages: Default::default(),
410+
iface_to_package_index: Default::default(),
411+
named_interfaces: Default::default(),
412+
resources: Default::default(),
413+
}
414+
}
415+
400416
fn decode_v1_package(
401417
&mut self,
402418
name: &ComponentName,
@@ -406,7 +422,7 @@ impl WitPackageDecoder<'_> {
406422
// importing from remote packages.
407423
for (name, ty) in ty.imports.iter() {
408424
let ty = match ty {
409-
types::ComponentEntityType::Instance(idx) => &self.info.types[*idx],
425+
types::ComponentEntityType::Instance(idx) => &self.types[*idx],
410426
_ => bail!("import `{name}` is not an instance"),
411427
};
412428
self.register_import(name, ty)
@@ -436,12 +452,12 @@ impl WitPackageDecoder<'_> {
436452
for (name, ty) in ty.exports.iter() {
437453
match ty {
438454
types::ComponentEntityType::Instance(idx) => {
439-
let ty = &self.info.types[*idx];
455+
let ty = &self.types[*idx];
440456
self.register_interface(name.as_str(), ty, &mut fields)
441457
.with_context(|| format!("failed to process export `{name}`"))?;
442458
}
443459
types::ComponentEntityType::Component(idx) => {
444-
let ty = &self.info.types[*idx];
460+
let ty = &self.types[*idx];
445461
self.register_world(name.as_str(), ty, &mut fields)
446462
.with_context(|| format!("failed to process export `{name}`"))?;
447463
}
@@ -469,7 +485,7 @@ impl WitPackageDecoder<'_> {
469485

470486
for (name, ty) in imports.iter() {
471487
let ty = match ty {
472-
types::ComponentEntityType::Instance(idx) => &self.info.types[*idx],
488+
types::ComponentEntityType::Instance(idx) => &self.types[*idx],
473489
_ => bail!("import `{name}` is not an instance"),
474490
};
475491
self.register_import(name, ty)
@@ -510,14 +526,13 @@ impl WitPackageDecoder<'_> {
510526
let name = import.name.0;
511527
log::debug!("decoding component import `{name}`");
512528
let ty = self
513-
.info
514529
.types
515530
.component_entity_type_of_import(name)
516531
.unwrap();
517532
let owner = TypeOwner::World(world);
518533
let (name, item) = match ty {
519534
types::ComponentEntityType::Instance(i) => {
520-
let ty = &self.info.types[i];
535+
let ty = &self.types[i];
521536
let (name, id) = if name.contains('/') {
522537
let id = self.register_import(name, ty)?;
523538
(WorldKey::Interface(id), id)
@@ -528,7 +543,7 @@ impl WitPackageDecoder<'_> {
528543
(name, WorldItem::Interface(id))
529544
}
530545
types::ComponentEntityType::Func(i) => {
531-
let ty = &self.info.types[i];
546+
let ty = &self.types[i];
532547
let func = self
533548
.convert_function(name, ty, owner)
534549
.with_context(|| format!("failed to decode function from import `{name}`"))?;
@@ -558,7 +573,7 @@ impl WitPackageDecoder<'_> {
558573
) -> Result<()> {
559574
let name = export.name.0;
560575
log::debug!("decoding component export `{name}`");
561-
let types = &self.info.types;
576+
let types = &self.types;
562577
let ty = types.component_entity_type_of_export(name).unwrap();
563578
let (name, item) = match ty {
564579
types::ComponentEntityType::Func(i) => {
@@ -640,7 +655,7 @@ impl WitPackageDecoder<'_> {
640655
log::debug!("type already exist");
641656
match referenced {
642657
types::ComponentAnyTypeId::Defined(ty) => {
643-
self.register_defined(id, &self.info.types[ty])?;
658+
self.register_defined(id, &self.types[ty])?;
644659
}
645660
types::ComponentAnyTypeId::Resource(_) => {}
646661
_ => unreachable!(),
@@ -684,7 +699,7 @@ impl WitPackageDecoder<'_> {
684699
// functions for remote dependencies and otherwise assert
685700
// they're already defined for local dependencies.
686701
types::ComponentEntityType::Func(ty) => {
687-
let def = &self.info.types[ty];
702+
let def = &self.types[ty];
688703
if self.resolve.interfaces[interface]
689704
.functions
690705
.contains_key(name.as_str())
@@ -721,7 +736,7 @@ impl WitPackageDecoder<'_> {
721736
let mut cur = id;
722737
while prev.is_none() {
723738
prev = self.type_map.get(&cur).copied();
724-
cur = match self.info.types.peel_alias(cur) {
739+
cur = match self.types.peel_alias(cur) {
725740
Some(next) => next,
726741
None => break,
727742
};
@@ -829,7 +844,7 @@ impl WitPackageDecoder<'_> {
829844
}
830845

831846
types::ComponentEntityType::Func(ty) => {
832-
let ty = &self.info.types[ty];
847+
let ty = &self.types[ty];
833848
let func = self
834849
.convert_function(name.as_str(), ty, owner)
835850
.with_context(|| format!("failed to convert function '{name}'"))?;
@@ -895,7 +910,7 @@ impl WitPackageDecoder<'_> {
895910
log::debug!("type export for `{name}` is a new type");
896911
match referenced {
897912
types::ComponentAnyTypeId::Defined(ty) => self
898-
.convert_defined(&self.info.types[ty])
913+
.convert_defined(&self.types[ty])
899914
.context("failed to convert unaliased type")?,
900915
types::ComponentAnyTypeId::Resource(_) => TypeDefKind::Resource,
901916
_ => unreachable!(),
@@ -949,7 +964,7 @@ impl WitPackageDecoder<'_> {
949964
for (name, ty) in ty.imports.iter() {
950965
let (name, item) = match ty {
951966
types::ComponentEntityType::Instance(idx) => {
952-
let ty = &self.info.types[*idx];
967+
let ty = &self.types[*idx];
953968
let (name, id) = if name.contains('/') {
954969
// If a name is an interface import then it is either to
955970
// a package-local or foreign interface, and both
@@ -974,7 +989,7 @@ impl WitPackageDecoder<'_> {
974989
(WorldKey::Name(name.to_string()), WorldItem::Type(ty))
975990
}
976991
types::ComponentEntityType::Func(idx) => {
977-
let ty = &self.info.types[*idx];
992+
let ty = &self.types[*idx];
978993
let func = self.convert_function(name.as_str(), ty, owner)?;
979994
(WorldKey::Name(name.to_string()), WorldItem::Function(func))
980995
}
@@ -986,7 +1001,7 @@ impl WitPackageDecoder<'_> {
9861001
for (name, ty) in ty.exports.iter() {
9871002
let (name, item) = match ty {
9881003
types::ComponentEntityType::Instance(idx) => {
989-
let ty = &self.info.types[*idx];
1004+
let ty = &self.types[*idx];
9901005
let (name, id) = if name.contains('/') {
9911006
// Note that despite this being an export this is
9921007
// calling `register_import`. With a URL this interface
@@ -1003,7 +1018,7 @@ impl WitPackageDecoder<'_> {
10031018
}
10041019

10051020
types::ComponentEntityType::Func(idx) => {
1006-
let ty = &self.info.types[*idx];
1021+
let ty = &self.types[*idx];
10071022
let func = self.convert_function(name.as_str(), ty, owner)?;
10081023
(WorldKey::Name(name.to_string()), WorldItem::Function(func))
10091024
}
@@ -1097,7 +1112,7 @@ impl WitPackageDecoder<'_> {
10971112
// errors on those types, but eventually the `bail!` here is
10981113
// more-or-less unreachable due to expected validation to be added to
10991114
// the component model binary format itself.
1100-
let def = &self.info.types[id];
1115+
let def = &self.types[id];
11011116
let kind = self.convert_defined(def)?;
11021117
match &kind {
11031118
TypeDefKind::Type(_)
@@ -1262,7 +1277,7 @@ impl WitPackageDecoder<'_> {
12621277

12631278
fn register_defined(&mut self, id: TypeId, def: &types::ComponentDefinedType) -> Result<()> {
12641279
Registrar {
1265-
types: &self.info.types,
1280+
types: &self.types,
12661281
type_map: &mut self.type_map,
12671282
resolve: &self.resolve,
12681283
}
@@ -1312,25 +1327,56 @@ impl WitPackageDecoder<'_> {
13121327
}
13131328

13141329
fn insert_package(&mut self, package: Package) -> PackageId {
1315-
let name = package.name.clone();
1316-
let id = self.resolve.packages.alloc(package);
1317-
let prev = self.resolve.package_names.insert(name, id);
1318-
assert!(prev.is_none());
1319-
for (_, iface) in self.resolve.packages[id].interfaces.iter() {
1320-
self.resolve.interfaces[*iface].package = Some(id);
1330+
let Package {
1331+
name,
1332+
interfaces,
1333+
worlds,
1334+
docs,
1335+
} = package;
1336+
1337+
// Most of the time the `package` being inserted is not already present
1338+
// in `self.resolve`, but in the case of the top-level `decode_world`
1339+
// function this isn't the case. This shouldn't in general be a problem
1340+
// so union-up the packages here while asserting that nothing gets
1341+
// replaced by accident which would indicate a bug.
1342+
let pkg = self
1343+
.resolve
1344+
.package_names
1345+
.get(&name)
1346+
.copied()
1347+
.unwrap_or_else(|| {
1348+
let id = self.resolve.packages.alloc(Package {
1349+
name: name.clone(),
1350+
interfaces: Default::default(),
1351+
worlds: Default::default(),
1352+
docs,
1353+
});
1354+
let prev = self.resolve.package_names.insert(name, id);
1355+
assert!(prev.is_none());
1356+
id
1357+
});
1358+
1359+
for (name, id) in interfaces {
1360+
let prev = self.resolve.packages[pkg].interfaces.insert(name, id);
1361+
assert!(prev.is_none());
1362+
self.resolve.interfaces[id].package = Some(pkg);
13211363
}
1322-
for (_, world) in self.resolve.packages[id].worlds.iter() {
1323-
self.resolve.worlds[*world].package = Some(id);
1324-
let world = &self.resolve.worlds[*world];
1364+
1365+
for (name, id) in worlds {
1366+
let prev = self.resolve.packages[pkg].worlds.insert(name, id);
1367+
assert!(prev.is_none());
1368+
let world = &mut self.resolve.worlds[id];
1369+
world.package = Some(pkg);
13251370
for (name, item) in world.imports.iter().chain(world.exports.iter()) {
13261371
if let WorldKey::Name(_) = name {
13271372
if let WorldItem::Interface(iface) = item {
1328-
self.resolve.interfaces[*iface].package = Some(id);
1373+
self.resolve.interfaces[*iface].package = Some(pkg);
13291374
}
13301375
}
13311376
}
13321377
}
1333-
id
1378+
1379+
pkg
13341380
}
13351381

13361382
fn visit_package(&self, idx: usize, order: &mut IndexSet<usize>) {

crates/wit-component/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ pub fn embed_component_metadata(
206206
world: WorldId,
207207
encoding: StringEncoding,
208208
) -> Result<()> {
209-
let encoded = metadata::encode(&wit_resolver, world, encoding, None)?;
209+
let encoded = metadata::encode(&wit_resolver, world, encoding, None, None)?;
210210

211211
let section = wasm_encoder::CustomSection {
212212
name: "component-type".into(),

0 commit comments

Comments
 (0)