Skip to content

Commit a32204f

Browse files
committed
Refactor CldrPluralRules
* Intoduce class `CldrPluralRule` containing the `PluralRuleDelegate` and a list of possible `PluralCategory` * Optimize `GetOperands` for zero heap allocations
1 parent 994182a commit a32204f

5 files changed

Lines changed: 570 additions & 403 deletions

File tree

src/SmartFormat.Tests/Utilities/CldrPluralRuleGenerator.cs

Lines changed: 132 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ namespace SmartFormat.Tests.Utilities;
1818
[ExcludeFromCodeCoverage]
1919
internal static class CldrPluralRuleGenerator
2020
{
21-
#region JSON Deserialization DTOs
21+
#region *** JSON Deserialization DTOs ***
2222

2323
private class CldrRoot
2424
{
@@ -45,7 +45,7 @@ private class CldrSupplemental
4545

4646
#endregion
4747

48-
#region Rule Processing Classes
48+
#region *** Rule Processing Classes ***
4949

5050
private class LanguageRuleSet
5151
{
@@ -88,33 +88,33 @@ public Rule(string category, string originalRule)
8888
#endregion
8989

9090
// Download the CLDR JSON data file from: https://github.com/unicode-org/cldr-json
91-
public static void Generate(string inputFile, string outputFile)
91+
internal static void Generate(string inputFile, string outputFile)
9292
{
9393
if (!File.Exists(inputFile))
9494
{
95-
Console.WriteLine($"Error: Input file not found: {Path.GetFullPath(inputFile)}");
95+
Console.WriteLine($@"Error: Input file not found: {Path.GetFullPath(inputFile)}");
9696
return;
9797
}
9898

99-
Console.WriteLine($"Reading CLDR plural rules from '{inputFile}'...");
99+
Console.WriteLine($@"Reading CLDR plural rules from '{inputFile}'...");
100100
var jsonContent = File.ReadAllText(inputFile);
101101
var cldrData = JsonSerializer.Deserialize<CldrRoot>(jsonContent);
102102

103103
if (cldrData?.Supplemental.PluralsTypeCardinal == null)
104104
{
105-
Console.WriteLine("Error: Failed to parse 'plurals-type-cardinal' data from JSON.");
105+
Console.WriteLine(@"Error: Failed to parse 'plurals-type-cardinal' data from JSON.");
106106
return;
107107
}
108108

109-
Console.WriteLine("Processing rules for each language...");
109+
Console.WriteLine(@"Processing rules for each language...");
110110
var langToRuleSet = cldrData.Supplemental.PluralsTypeCardinal
111111
.ToDictionary(kvp => kvp.Key, kvp => new LanguageRuleSet(kvp.Value));
112112

113113
var rulesToLangs = langToRuleSet
114114
.GroupBy(kvp => kvp.Value.CanonicalKey)
115115
.ToDictionary(g => g.Key, g => g.Select(kvp => kvp.Key).ToList());
116116

117-
Console.WriteLine($"Generating '{outputFile}'...");
117+
Console.WriteLine($@"Generating '{outputFile}'...");
118118
var sb = new StringBuilder();
119119

120120
// Add header
@@ -129,9 +129,11 @@ public static void Generate(string inputFile, string outputFile)
129129

130130
File.WriteAllText(outputFile, sb.ToString(), Encoding.UTF8);
131131
Console.WriteLine(
132-
$"Successfully generated '{outputFile}'. Found {rulesToLangs.Count} unique pluralization rule sets across {langToRuleSet.Count} languages.");
132+
$@"Successfully generated '{outputFile}'. Found {rulesToLangs.Count} unique pluralization rule sets across {langToRuleSet.Count} languages.");
133133
}
134134

135+
#region *** Rule Translation and Code Generation ***
136+
135137
internal static readonly char[] CommaSeparator = { ',' };
136138

137139
internal static readonly string[] OrSeparator = { " or " };
@@ -236,7 +238,7 @@ private static void GenerateCSharpCode(StringBuilder sb, Dictionary<string, List
236238

237239
sb.AppendLine("using System;");
238240
sb.AppendLine("using System.Collections.Generic;");
239-
sb.AppendLine("using System.Globalization;");
241+
sb.AppendLine("using System.Linq;");
240242
sb.AppendLine();
241243
sb.AppendLine("namespace SmartFormat.Utilities;");
242244
sb.AppendLine();
@@ -253,6 +255,25 @@ private static void GenerateCSharpCode(StringBuilder sb, Dictionary<string, List
253255
sb.AppendLine(" Other");
254256
sb.AppendLine("}");
255257
sb.AppendLine();
258+
sb.AppendLine("""
259+
public sealed class CldrPluralRule
260+
{
261+
public IReadOnlyList<PluralCategory> PossibleCategories { get; }
262+
public CldrPluralRules.PluralRuleDelegate Delegate { get; }
263+
264+
public CldrPluralRule(IEnumerable<PluralCategory> possibleCategories, CldrPluralRules.PluralRuleDelegate ruleDelegate)
265+
{
266+
PossibleCategories = possibleCategories.ToArray();
267+
Delegate = ruleDelegate;
268+
}
269+
270+
public PluralCategory GetCategory(decimal value)
271+
{
272+
return Delegate(value);
273+
}
274+
}
275+
276+
""");
256277
sb.Append("""
257278
/// <summary>
258279
/// Provides functionality for determining plural categories based
@@ -274,7 +295,7 @@ public static class CldrPluralRules
274295
sb.AppendLine(
275296
" // For languages with a single form (e.g., Japanese, Chinese, Vietnamese), the category is 'other'.");
276297
sb.AppendLine(
277-
" private static PluralCategory Singular(decimal value, int pluralWordsCount) => PluralCategory.Other;");
298+
" private static PluralCategory Singular(decimal value) => PluralCategory.Other;");
278299
sb.AppendLine();
279300
foreach (var (key, langs) in rulesToLangs.OrderBy(kvp => kvp.Value.First()))
280301
{
@@ -284,7 +305,7 @@ public static class CldrPluralRules
284305
var ruleSet = langToRuleSet[langs.First()];
285306
var titleCase = CultureInfo.InvariantCulture.TextInfo;
286307
sb.AppendLine($" // Rule for: {string.Join(", ", langs.Take(5))}{(langs.Count > 5 ? ", ..." : "")}");
287-
sb.AppendLine($" private static PluralCategory {methodName}(decimal value, int pluralWordsCount)");
308+
sb.AppendLine($" private static PluralCategory {methodName}(decimal value)");
288309
sb.AppendLine(" {");
289310
sb.AppendLine(" var (n, i, v, w, f, t, e) = GetOperands(value);");
290311

@@ -303,8 +324,9 @@ public static class CldrPluralRules
303324
sb.AppendLine(" #endregion");
304325
sb.AppendLine();
305326

327+
sb.AppendLine(" #region Language-to-Rule Mapping");
306328
sb.AppendLine(
307-
" private static readonly Dictionary<string, PluralRuleDelegate> DefaultLangToDelegate = new()");
329+
" private static readonly Dictionary<string, CldrPluralRule> DefaultLangToRule= new()");
308330
sb.AppendLine(" {");
309331

310332
foreach (var lang in rulesToLangs.SelectMany(kvp => kvp.Value).OrderBy(l => l))
@@ -313,81 +335,117 @@ public static class CldrPluralRules
313335
var methodName = ruleMethodNames[ruleSet.CanonicalKey];
314336
var categories = ruleSet.Rules.Select(r => r.Category).ToList();
315337
categories.Add("other");
316-
var comment = string.Join(", ", categories);
317-
sb.AppendLine($" {{ \"{lang}\", {methodName} }}, // {comment}");
338+
var categoryArrayString = $"new[] {{ {string.Join(", ", categories.Select(c => $"PluralCategory.{CultureInfo.InvariantCulture.TextInfo.ToTitleCase(c)}"))} }}";
339+
sb.AppendLine($" {{ \"{lang}\", new CldrPluralRule({categoryArrayString}, {methodName}) }},");
318340
}
319341

320342
sb.AppendLine(" };");
321-
sb.AppendLine();
322-
343+
sb.AppendLine(" #endregion");
323344
sb.AppendLine(
324-
@" public static Dictionary<string, PluralRuleDelegate> IsoLangToDelegate { get; private set; } = new(DefaultLangToDelegate);
345+
"""
346+
347+
#region CLDR Helper methods
348+
349+
/// <summary>
350+
/// n => The absolute value of the input number (e.g., 1, 2.5, 0.01)
351+
/// u => The integer digits of n (deprecated in CLDR, but may appear in legacy rules)
352+
/// v => Number of visible fraction digits in n (e.g., 1.5 => v = 1)
353+
/// w => Number of visible fraction digits excluding trailing zeros (e.g., 1.50 => w = 1)
354+
/// f => Visible fraction digits as an integer (e.g., 1.25 => f = 25)
355+
/// t => Visible fraction digits excluding trailing zeros (e.g., 1.50 => t = 5)
356+
/// e => Always 0 for decimal input
357+
/// </summary>
358+
private static (decimal n, long i, int v, int w, long f, long t, int e) GetOperands(decimal value)
359+
{
360+
// Get absolute value for CLDR rules
361+
var n = Math.Abs(value);
362+
363+
// GetBits() returns an array of four integers.
364+
// Extract the binary representation of the decimal:
365+
// decimals are stored as 96-bit integer with a scaling factor.
366+
// - bits[0], bits[1], bits[2] form the 96-bit integer value
367+
// - bits[3] contains the scale (bits 16-23) and sign (bit 31)
368+
var bits = decimal.GetBits(n);
369+
370+
// Integer part (before decimal point)
371+
var i = (long) Math.Truncate(n);
372+
373+
// v = number of visible fractional digits (including trailing zeros)
374+
// w = number of significant fractional digits (excluding trailing zeros)
375+
// f = fractional digits as integer (with trailing zeros)
376+
// t = fractional digits as integer (without trailing zeros)
377+
// e = exponent (used for scientific notation)
378+
// Only numbers like 1.0e6 should have e != 0
379+
// Since we're parsing from regular decimal, e is always 0
380+
int v = 0, w = 0, e = 0;
381+
long f = 0, t = 0;
382+
383+
// Extract scale factor - to see how many fractional digits exist
384+
// Move scale to the lowest 8 bits and filter lower 5 bits (0-28)
385+
var scale = (bits[3] >> 16) & 0x1F;
386+
387+
if (scale > 0)
388+
{
389+
// Total visible fractional digits (v) equals the scale
390+
v = scale;
391+
392+
// Calculate fractional part mathematically
393+
var fractionalPart = n - i;
394+
f = (long) (fractionalPart * Pow10(scale));
395+
396+
// Remove trailing zeros for significant fractional digits (w)
397+
var temp = f;
398+
w = scale;
399+
while (temp % 10 == 0 && temp > 0)
400+
{
401+
temp /= 10;
402+
w--;
403+
}
404+
t = temp;
405+
}
325406
326-
public static void RestoreDefault() => IsoLangToDelegate = new Dictionary<string, PluralRuleDelegate>(DefaultLangToDelegate);
407+
return (n, i, v, w, f, t, e);
408+
}
327409
328-
public delegate PluralCategory PluralRuleDelegate(decimal value, int pluralWordsCount);
329-
330-
public static PluralRuleDelegate GetPluralRule(string? twoLetterIsoLanguageName)
331-
{
332-
if (twoLetterIsoLanguageName != null && IsoLangToDelegate.TryGetValue(twoLetterIsoLanguageName, out var rule))
333-
return rule;
334-
335-
return IsoLangToDelegate[""und""];
336-
}
410+
private static decimal Pow10(int exponent) => (decimal) Math.Pow(10, exponent);
337411
338-
#region CLDR Helper methods
339-
340-
/// <summary>
341-
/// n => The absolute value of the input number (e.g., 1, 2.5, 0.01)
342-
/// u => The integer digits of n (deprecated in CLDR, but may appear in legacy rules)
343-
/// v => Number of visible fraction digits in n (e.g., 1.5 => v = 1)
344-
/// w => Number of visible fraction digits excluding trailing zeros (e.g., 1.50 => w = 1)
345-
/// f => Visible fraction digits as an integer (e.g., 1.25 => f = 25)
346-
/// t => Visible fraction digits excluding trailing zeros (e.g., 1.50 => t = 5)
347-
/// </summary>
348-
private static (decimal n, long i, int v, int w, long f, long t, int e) GetOperands(decimal value)
349-
{
350-
var n = Math.Abs(value);
351-
var s = value.ToString(CultureInfo.InvariantCulture);
352-
long i = (long)Math.Truncate(n);
353-
int v = 0, w = 0, e = 0;
354-
long f = 0, t = 0;
355-
356-
var decPoint = s.IndexOf('.');
357-
if (decPoint != -1)
358-
{
359-
var fractionStr = s.Substring(decPoint + 1);
360-
v = fractionStr.Length;
361-
long.TryParse(fractionStr, NumberStyles.Integer, CultureInfo.InvariantCulture, out f);
412+
private static bool IsInRange(long val, long min, long max) => val >= min && val <= max;
413+
414+
private static bool IsInRange(decimal val, decimal min, decimal max)
415+
{
416+
// First check if value is within the range
417+
if (val < min || val > max)
418+
return false;
362419
363-
var fractionStrTrimmed = fractionStr.TrimEnd('0');
364-
w = fractionStrTrimmed.Length;
365-
long.TryParse(fractionStrTrimmed, NumberStyles.Integer, CultureInfo.InvariantCulture, out t);
366-
}
420+
// If min and max are integers (whole numbers), then require val to also be integer
421+
// This for rules like 'n = 2..4' which should not match '2.5'
422+
if (min == Math.Truncate(min) && max == Math.Truncate(max))
423+
{
424+
return val == Math.Truncate(val);
425+
}
367426
368-
return (n, i, v, w, f, t, e);
369-
}
427+
return true;
428+
}
370429
371-
private static bool IsInRange(long val, long min, long max) => val >= min && val <= max;
372-
373-
private static bool IsInRange(decimal val, decimal min, decimal max)
374-
{
375-
// First check if value is within the range
376-
if (val < min || val > max)
377-
return false;
430+
#endregion
378431
379-
// If min and max are integers (whole numbers), then require val to also be integer
380-
// This for rules like 'n = 2..4' which should not match '2.5'
381-
if (min == Math.Truncate(min) && max == Math.Truncate(max))
382-
{
383-
return val == Math.Truncate(val);
384-
}
432+
public static Dictionary<string, CldrPluralRule> IsoCodeToRule { get; private set; } = new(DefaultLangToRule);
433+
434+
public static void RestoreDefault() => IsoCodeToRule = new Dictionary<string, CldrPluralRule>(DefaultLangToRule);
385435
386-
return true;
436+
internal delegate PluralCategory PluralRuleDelegate(decimal value);
437+
438+
public static CldrPluralRule GetPluralRule(string? twoLetterIsoCode)
439+
{
440+
if (twoLetterIsoCode != null && IsoCodeToRule.TryGetValue(twoLetterIsoCode, out var rule))
441+
return rule;
442+
443+
return IsoCodeToRule["und"];
444+
}
445+
}
446+
""");
387447
}
388448

389449
#endregion
390-
}");
391-
}
392450
}
393451
#endif

0 commit comments

Comments
 (0)