Skip to content

Commit 3cd87ad

Browse files
Rename maybeDecompressedStringAsNumber and propagate mayContainCompressedIds to non-leaf nodes (#26980)
## Description Improves clarity around how compressed ids are handled in UniformChunks, and tracks if a chunk might use them (deeply) in the shape. As a memory optimization, avoid storing idCompressor when unneeded. For small leaf chunks (as is common currently) this should help memory use a little as there are a very large number of such chunks.
1 parent ed0efc3 commit 3cd87ad

File tree

4 files changed

+132
-25
lines changed

4 files changed

+132
-25
lines changed

packages/dds/tree/src/feature-libraries/chunked-forest/chunkTree.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,8 @@ export interface ChunkCompressor {
424424
/**
425425
* If the idCompressor is provided, {@link UniformChunk}s with identifiers will be encoded for its in-memory representation.
426426
* @remarks
427-
* This compression applies to {@link UniformChunk}s when {@link TreeShape.maybeDecompressedStringAsNumber} is set.
428-
* If the `policy` does not use UniformChunks or does not set `maybeDecompressedStringAsNumber`, then no compression will be applied even when providing `idCompressor`.
427+
* This compression applies to {@link UniformChunk}s when {@link TreeShape.mayContainCompressedIds} is set.
428+
* If the `policy` does not use UniformChunks or does not set `mayContainCompressedIds`, then no compression will be applied even when providing `idCompressor`.
429429
*/
430430
readonly idCompressor: IIdCompressor | undefined;
431431
}
@@ -583,6 +583,7 @@ export function insertValues(
583583
// Slow path: walk shape and cursor together, inserting values.
584584
if (shape.hasValue) {
585585
if (
586+
shape.mayContainCompressedIds &&
586587
typeof cursor.value === "string" &&
587588
idCompressor !== undefined &&
588589
isStableNodeIdentifier(cursor.value)

packages/dds/tree/src/feature-libraries/chunked-forest/uniformChunk.ts

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import { assert, compareArrays, oob, fail } from "@fluidframework/core-utils/internal";
77
import type { SessionSpaceCompressedId, IIdCompressor } from "@fluidframework/id-compressor";
8-
import { UsageError } from "@fluidframework/telemetry-utils/internal";
98

109
import {
1110
CursorLocationType,
@@ -54,7 +53,7 @@ export class UniformChunk extends ReferenceCountedBase implements TreeChunk {
5453
idCompressor?: IIdCompressor,
5554
) {
5655
super();
57-
this.idCompressor = idCompressor;
56+
this.idCompressor = shape.treeShape.mayContainCompressedIds ? idCompressor : undefined;
5857
assert(
5958
shape.treeShape.valuesPerTopLevelNode * shape.topLevelLength === values.length,
6059
0x4c3 /* invalid number of values for shape */,
@@ -100,30 +99,42 @@ export class TreeShape {
10099
// TODO: this is only needed at chunk roots. Optimize it base on that.
101100
public readonly positions: readonly NodePositionInfo[];
102101

102+
/**
103+
* Whether chunks using this shape (including any descendant leaf within it) may contain values compressed by the {@link UniformChunk.idCompressor}.
104+
*
105+
* @remarks
106+
* For string leaf nodes, this can be explicitly set to `true` to indicate that the value may be a compressed id
107+
* stored as a number that needs to be decompressed back to a string.
108+
* For non-leaf nodes, this is automatically derived from whether any child shapes have it set.
109+
*/
110+
public readonly mayContainCompressedIds: boolean;
111+
103112
/**
104113
*
105114
* @param type - {@link TreeNodeSchemaIdentifier} used to compare shapes.
106115
* @param hasValue - whether or not the TreeShape has a value.
107116
* @param fieldsArray - an array of {@link FieldShape} values, which contains a TreeShape for each FieldKey.
108117
*
109-
* @param maybeDecompressedStringAsNumber - used to check whether or not the value could have been compressed by the idCompressor.
110-
* This flag can only be set on string leaf nodes, and will throw a usage error otherwise.
111-
* If set to true, an additional check can be made (example: getting the value of {@link Cursor}) to return the original uncompressed value.
118+
* @param maybeCompressedIdLeaf - whether the value may have been compressed by the {@link UniformChunk.idCompressor}.
119+
* Can only be explicitly set to `true` on string leaf nodes; otherwise this constructor asserts.
120+
* For non-leaf nodes, {@link TreeShape.mayContainCompressedIds} is automatically derived from child shapes.
112121
*/
113122
public constructor(
114123
public readonly type: TreeNodeSchemaIdentifier,
115124
public readonly hasValue: boolean,
116125
public readonly fieldsArray: readonly FieldShape[],
117-
public readonly maybeDecompressedStringAsNumber: boolean = false,
126+
maybeCompressedIdLeaf: boolean = false,
118127
) {
119-
if (
120-
maybeDecompressedStringAsNumber &&
121-
!(hasValue && type === "com.fluidframework.leaf.string")
122-
) {
123-
throw new UsageError(
124-
"maybeDecompressedStringAsNumber flag can only be set to true for string leaf node.",
128+
assert(hasValue === false || fieldsArray.length === 0, "only non-leaf can have fields");
129+
if (maybeCompressedIdLeaf) {
130+
assert(
131+
hasValue && type === "com.fluidframework.leaf.string",
132+
"only strings can opt into maybeCompressedIdLeaf",
125133
);
126134
}
135+
// For non-leaf nodes, derive from whether any child shapes contain compressed ids.
136+
this.mayContainCompressedIds =
137+
maybeCompressedIdLeaf || fieldsArray.some(([, shape]) => shape.mayContainCompressedIds);
127138
const fields: Map<FieldKey, OffsetShape> = new Map();
128139
let numberOfValues = hasValue ? 1 : 0;
129140
const infos: NodePositionInfo[] = [
@@ -146,7 +157,7 @@ export class TreeShape {
146157
}
147158

148159
public equals(other: TreeShape): boolean {
149-
// TODO: either dedup instances and/or store a collision resistant hash for fast compare.
160+
// TODO: either dedupe instances and/or store a collision resistant hash for fast compare.
150161

151162
if (
152163
!compareArrays(
@@ -157,7 +168,11 @@ export class TreeShape {
157168
) {
158169
return false;
159170
}
160-
return this.type === other.type && this.hasValue === other.hasValue;
171+
return (
172+
this.type === other.type &&
173+
this.hasValue === other.hasValue &&
174+
this.mayContainCompressedIds === other.mayContainCompressedIds
175+
);
161176
}
162177

163178
public withTopLevelLength(topLevelLength: number): ChunkShape {
@@ -551,9 +566,8 @@ class Cursor extends SynchronousCursor implements ChunkedCursor {
551566
const info = this.nodeInfo(CursorLocationType.Nodes);
552567
if (info.shape.hasValue) {
553568
const value = this.chunk.values[info.valueOffset];
554-
// If the maybeDecompressedStringAsNumber flag is set to true, we check if the value is a number.
555-
// This flag can only ever be set on string leaf nodes, so if the value is a number, we can assume it is a compressible, known stable id.
556-
if (info.shape.maybeDecompressedStringAsNumber && typeof value === "number") {
569+
// If mayContainCompressedIds is set, check if the value is a number (i.e. a compressed ID that needs decompression).
570+
if (info.shape.mayContainCompressedIds && typeof value === "number") {
557571
const idCompressor = this.chunk.idCompressor;
558572
assert(
559573
idCompressor !== undefined,

packages/dds/tree/src/test/feature-libraries/chunked-forest/chunkTree.spec.ts

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,52 @@ describe("chunkTree", () => {
104104
assert.deepEqual(values, chunk.values);
105105
});
106106
}
107+
108+
it("does not compress string values when shape does not have mayContainCompressedIds", () => {
109+
// Simulate a string leaf whose shape does not opt into compressed ids
110+
// (e.g. a plain string field in a string | number union).
111+
// Even if the string value is a valid stable id and an idCompressor is provided,
112+
// insertValues must leave it as a string.
113+
const stableId = testIdCompressor.decompress(testIdCompressor.generateCompressedId());
114+
const stringShapeNoCompress = new TreeShape(
115+
brand(stringSchema.identifier),
116+
true,
117+
[],
118+
false,
119+
);
120+
121+
const values: Value[] = [];
122+
const cursor = cursorForJsonableTreeNode({
123+
type: brand(stringSchema.identifier),
124+
value: stableId,
125+
});
126+
insertValues(cursor, stringShapeNoCompress, values, testIdCompressor);
127+
// The value must remain the original string, not a compressed numeric id.
128+
assert.equal(values.length, 1);
129+
assert.equal(typeof values[0], "string");
130+
assert.equal(values[0], stableId);
131+
});
132+
133+
it("compresses string values when shape has mayContainCompressedIds", () => {
134+
const compressedId = testIdCompressor.generateCompressedId();
135+
const stableId = testIdCompressor.decompress(compressedId);
136+
const stringShapeCompress = new TreeShape(
137+
brand(stringSchema.identifier),
138+
true,
139+
[],
140+
true,
141+
);
142+
143+
const values: Value[] = [];
144+
const cursor = cursorForJsonableTreeNode({
145+
type: brand(stringSchema.identifier),
146+
value: stableId,
147+
});
148+
insertValues(cursor, stringShapeCompress, values, testIdCompressor);
149+
// The value must be compressed to the numeric id.
150+
assert.equal(values.length, 1);
151+
assert.equal(values[0], compressedId);
152+
});
107153
});
108154

109155
describe("uniformChunkFromCursor", () => {

packages/dds/tree/src/test/feature-libraries/chunked-forest/uniformChunk.spec.ts

Lines changed: 52 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import { strict as assert } from "node:assert";
77

88
import { BenchmarkType, benchmark } from "@fluid-tools/benchmark";
9-
import { validateUsageError } from "@fluidframework/test-runtime-utils/internal";
9+
import { validateAssertionError } from "@fluidframework/test-runtime-utils/internal";
1010

1111
import { EmptyKey, type ITreeCursorSynchronous } from "../../../core/index.js";
1212
import {
@@ -66,16 +66,62 @@ describe("uniformChunk", () => {
6666
validateShape(tree.dataFactory().shape);
6767
});
6868
}
69-
it("shape with maybeCompressedStringAsNumber flag set to true fails if it is not a string leaf node.", () => {
69+
it("shape with mayContainCompressedIds flag set to true fails if it is not a string leaf node.", () => {
7070
const validShapeWithFlag = new TreeShape(brand(stringSchema.identifier), true, [], true);
71-
// Test that a non string leaf node shape with maybeCompressedStringAsNumber set to true fails.
71+
// Test that a non string leaf node shape with mayContainCompressedIds set to true fails.
7272
assert.throws(
7373
() => new TreeShape(brand(numberSchema.identifier), true, [], true),
74-
validateUsageError(
75-
/maybeDecompressedStringAsNumber flag can only be set to true for string leaf node./,
76-
),
74+
validateAssertionError("only strings can opt into maybeCompressedIdLeaf"),
7775
);
7876
});
77+
78+
it("equals distinguishes shapes differing only by mayContainCompressedIds", () => {
79+
const withIds = new TreeShape(brand(stringSchema.identifier), true, [], true);
80+
const withoutIds = new TreeShape(brand(stringSchema.identifier), true, [], false);
81+
assert.equal(withIds.equals(withoutIds), false);
82+
assert.equal(withoutIds.equals(withIds), false);
83+
// Self-equality still holds
84+
assert.equal(withIds.equals(withIds), true);
85+
assert.equal(withoutIds.equals(withoutIds), true);
86+
});
87+
88+
it("mayContainCompressedIds propagates from child shapes to parent shapes", () => {
89+
const leafWithIds = new TreeShape(brand(stringSchema.identifier), true, [], true);
90+
const leafWithoutIds = new TreeShape(brand(stringSchema.identifier), true, [], false);
91+
92+
// Parent with a child that has `mayContainCompressedIds` should also have it.
93+
const parentWithCompressedChild = new TreeShape(
94+
brand(JsonAsTree.JsonObject.identifier),
95+
false,
96+
[[xField, leafWithIds, 1]],
97+
);
98+
assert.equal(parentWithCompressedChild.mayContainCompressedIds, true);
99+
100+
// Parent with no children that have `mayContainCompressedIds` should not have it.
101+
const parentWithoutCompressedChild = new TreeShape(
102+
brand(JsonAsTree.JsonObject.identifier),
103+
false,
104+
[[xField, leafWithoutIds, 1]],
105+
);
106+
assert.equal(parentWithoutCompressedChild.mayContainCompressedIds, false);
107+
108+
// Propagation through multiple levels: grandparent should inherit from grandchild.
109+
const grandparent = new TreeShape(brand(JsonAsTree.Array.identifier), false, [
110+
[EmptyKey, parentWithCompressedChild, 1],
111+
]);
112+
assert.equal(grandparent.mayContainCompressedIds, true);
113+
114+
// Mixed children: one with and one without compressed IDs.
115+
const parentWithMixedChildren = new TreeShape(
116+
brand(JsonAsTree.JsonObject.identifier),
117+
false,
118+
[
119+
[xField, leafWithoutIds, 1],
120+
[yField, leafWithIds, 1],
121+
],
122+
);
123+
assert.equal(parentWithMixedChildren.mayContainCompressedIds, true);
124+
});
79125
});
80126

81127
testSpecializedFieldCursor<TreeChunk, ITreeCursorSynchronous>({

0 commit comments

Comments
 (0)