All public declarations live in include/rakam.h. Link against
build/librakam.a and ../decoder/build/libunicode.a.
#include <rakam.h>Buffers: the library never allocates. Callers provide fixed-size stack structs. Relevant constants:
| Constant | Value | Purpose |
|---|---|---|
RAKAM_MAX_DIGITS |
17 | ITU upper bound for the national number |
RAKAM_MAX_EXTENSION |
8 | Extension digits |
RAKAM_MAX_FORMATTED |
48 | Any single formatter's output |
RAKAM_MAX_RAW |
64 | parseAndKeepRawInput scratch |
RAKAM_REGION_LEN |
2 | ISO 3166-1 alpha-2 |
RAKAM_OK /* parse / format succeeded */
RAKAM_ERR_EMPTY /* input had no digits */
RAKAM_ERR_INVALID_UTF8 /* non-UTF-8 bytes in input */
RAKAM_ERR_TOO_SHORT /* shorter than region's min */
RAKAM_ERR_TOO_LONG /* longer than region's max */
RAKAM_ERR_UNKNOWN_COUNTRY /* no region supplied / +cc not routable */
RAKAM_ERR_INVALID_NUMBER /* never fits any format / null args */
RAKAM_ERR_BUFFER_TOO_SMALL /* formatter output overflow */
RAKAM_ERR_NOT_INITIALIZED /* called before rakam_init */Pretty-print via const char *rakam_status_string(rakam_status_t).
RAKAM_TYPE_UNKNOWN = 0
RAKAM_TYPE_FIXED_LINE = 1
RAKAM_TYPE_MOBILE = 2
RAKAM_TYPE_FIXED_OR_MOBILE = 3 /* both patterns identical */
RAKAM_TYPE_TOLL_FREE = 4
RAKAM_TYPE_PREMIUM_RATE = 5
RAKAM_TYPE_SHARED_COST = 6
RAKAM_TYPE_VOIP = 7
RAKAM_TYPE_PERSONAL = 8
RAKAM_TYPE_PAGER = 9
RAKAM_TYPE_UAN = 10
RAKAM_TYPE_VOICEMAIL = 11RAKAM_FMT_E164 /* +<cc><nsn> */
RAKAM_FMT_INTERNATIONAL /* +<cc> <grouped nsn> [ext. N] */
RAKAM_FMT_NATIONAL /* grouped nsn with national prefix [ext. N] */
RAKAM_FMT_RFC3966 /* tel:+<cc>-<grouped>[;ext=N] */RAKAM_MATCH_NOT_A_NUMBER = -1
RAKAM_MATCH_NO_MATCH = 0
RAKAM_MATCH_SHORT_NSN = 1 /* one is a trailing substring */
RAKAM_MATCH_NSN = 2 /* NSN equal, maybe different cc/ext */
RAKAM_MATCH_EXACT = 3 /* cc + nsn + ext all equal */typedef struct {
uint16_t country_code; /* 1 / 44 / 90 / … */
char region[3]; /* "US" / "TR" / "" */
uint8_t national_len;
char national[RAKAM_MAX_DIGITS + 1]; /* ASCII digits */
uint8_t extension_len;
char extension[RAKAM_MAX_EXTENSION + 1];
rakam_type_t type;
bool is_valid;
bool is_possible;
bool has_plus;
bool italian_leading_zero;
uint8_t number_of_leading_zeros;
uint8_t raw_input_len;
char raw_input[RAKAM_MAX_RAW + 1]; /* set only by *_and_keep_raw */
} rakam_number_t;void rakam_init(void);
void rakam_cleanup(void);
const char *rakam_version(void);rakam_init builds the SIMD compaction LUT and initialises the
decoder library. Idempotent and thread-safe via pthread_once.
Call before any other entry point.
rakam_status_t rakam_parse(const char *input, size_t input_len,
const char *default_region,
rakam_number_t *out);default_region is the ISO alpha-2 code (e.g. "TR", "US"). Pass
NULL if you only accept international-form input (+<cc>…).
rakam_status_t rakam_parse_and_keep_raw(const char *input, size_t input_len,
const char *default_region,
rakam_number_t *out);Identical to rakam_parse, plus copies up to RAKAM_MAX_RAW bytes
of the raw input into out->raw_input for later recall by
rakam_format_in_original.
Example
rakam_number_t n;
switch (rakam_parse("+1 (415) 555-2671 ext. 42", 25, NULL, &n)) {
case RAKAM_OK: /* n.country_code=1, n.region="US",
n.national="4155552671", n.extension="42" */
default: /* see n.is_possible */
}rakam_status_t rakam_format(const rakam_number_t *n,
rakam_format_t style,
char *out, size_t cap, size_t *out_len);Dispatches to one of four internal formatters:
| style | example output |
|---|---|
RAKAM_FMT_E164 |
+14155552671 |
RAKAM_FMT_INTERNATIONAL |
+1 415-555-2671 |
RAKAM_FMT_NATIONAL |
(415) 555-2671 |
RAKAM_FMT_RFC3966 |
tel:+1-415-555-2671 |
Extensions, when present on n, are appended:
NATIONAL/INTERNATIONAL:" ext. <digits>"RFC3966:";ext=<digits>"E164: omitted (ITU rule)
rakam_status_t rakam_format_out_of_country(const rakam_number_t *n,
const char *calling_from_region,
char *out, size_t cap,
size_t *out_len);Prepends the caller's IDD prefix. For unknown / same region, falls
back to INTERNATIONAL / NATIONAL.
+90 5321234567 called from US: "011 90 532 123 45 67"
+1 4155552671 called from DE: "00 1 415-555-2671"
+90 5321234567 called from TR: "0532 123 45 67" (domestic)
rakam_status_t rakam_format_with_carrier_code(const rakam_number_t *n,
const char *carrier_code,
char *out, size_t cap,
size_t *out_len);Produces <national_prefix> <carrier_code> <intl-style-grouping> —
the shape used in Brazil / Argentina.
rakam_status_t rakam_format_in_original(const rakam_number_t *n,
char *out, size_t cap,
size_t *out_len);Returns n->raw_input verbatim when populated (by
parseAndKeepRawInput), otherwise falls back to NATIONAL
(or INTERNATIONAL when the original had a +).
bool rakam_is_valid(const rakam_number_t *n); /* n->is_valid */
bool rakam_is_possible(const rakam_number_t *n); /* n->is_possible */
bool rakam_is_valid_for_region(const rakam_number_t *n,
const char *region);is_valid_for_region adds a region-match gate on top of is_valid:
it returns true only when n->region equals the hint AND
n->is_valid.
n->type carries the matched per-type rule (FIXED_LINE / MOBILE /
TOLL_FREE / …). NANPA and similar regions where fixed and mobile
patterns coincide yield FIXED_OR_MOBILE.
uint16_t rakam_country_code_for_region(const char *region);
const char *rakam_main_region_for_country_code(uint16_t cc);
size_t rakam_supported_regions_count(void);
typedef void (*rakam_region_cb)(const char *region, uint16_t cc, void *ctx);
void rakam_iterate_regions(rakam_region_cb cb, void *ctx);
uint8_t rakam_length_of_national_destination_code(const rakam_number_t *n);
rakam_match_t rakam_match(const rakam_number_t *a,
const rakam_number_t *b);rakam_status_t rakam_get_example_number(const char *region,
rakam_type_t type,
rakam_number_t *out);Pass RAKAM_TYPE_UNKNOWN to get any example for the region;
pass FIXED_OR_MOBILE to accept either fixedLine or mobile.
Returns a fully populated rakam_number_t — example numbers have
is_valid = true, is_possible = true, has_plus = true.
rakam_number_t ex;
rakam_get_example_number("TR", RAKAM_TYPE_MOBILE, &ex);
/* ex.country_code=90, ex.national="5012345678" */bool rakam_is_valid_short_number(const char *digits, size_t len,
const char *region);
bool rakam_is_emergency_number(const char *digits, size_t len,
const char *region);Both expect ASCII digit strings (no +, no punctuation). Returns
false for unknown regions, empty input, or any non-digit byte.
rakam_is_emergency_number("911", 3, "US"); /* true */
rakam_is_emergency_number("411", 3, "US"); /* false (valid short, not emergency) */
rakam_is_valid_short_number("411", 3, "US"); /* true */
rakam_is_emergency_number("112", 3, "TR"); /* true */
rakam_is_emergency_number("110", 3, "DE"); /* true */Progressive formatting as the user types. State-plain struct; live on the caller's stack.
rakam_aytf_t f;
rakam_aytf_init(&f, "US");
rakam_aytf_input(&f, '4'); /* "4" */
rakam_aytf_input(&f, '1'); /* "41" */
rakam_aytf_input(&f, '5'); /* "415" */
rakam_aytf_input(&f, '5'); /* "(415) 5" */
/* ... */
rakam_aytf_input(&f, '1'); /* "(415) 555-2671" */
rakam_aytf_current(&f); /* last display */
rakam_aytf_clear(&f); /* reset; region stays */- Leading
+switches the formatter to international echo mode. - A first keystroke equal to the region's
national_prefix(e.g.0for TR,1for US) is stripped once and remembered; subsequent output folds it back in via the$NPtemplate token. - Punctuation typed by the user is ignored (does not reset state).
- No cursor tracking — the host UI framework handles that.
Free-text extraction.
typedef struct {
size_t start;
size_t length;
rakam_number_t number;
} rakam_match_result_t;
typedef void (*rakam_find_cb)(const rakam_match_result_t *m, void *ctx);
size_t rakam_find_numbers(const char *text, size_t text_len,
const char *default_region,
rakam_find_cb cb, void *ctx);start / length index into the original text bytes, already
trimmed to a digit / + boundary on both sides. Callback fires once
per successful parse in the order matches appear.
static void on_match(const rakam_match_result_t *m, void *ctx) {
char buf[RAKAM_MAX_FORMATTED]; size_t n;
rakam_format(&m->number, RAKAM_FMT_E164, buf, sizeof(buf), &n);
printf(" @%-3zu len=%zu -> %s\n", m->start, m->length, buf);
}
rakam_find_numbers("Call +14155552671 or +442079460958 today.",
43, NULL, on_match, NULL);V1 leniency is POSSIBLE only. No letterphone inside matches; no
STRICT_GROUPING / EXACT_GROUPING tiers.
/* lifecycle */
void rakam_init(void);
void rakam_cleanup(void);
const char *rakam_version(void);
const char *rakam_status_string(rakam_status_t);
/* parsing */
rakam_status_t rakam_parse(const char *, size_t, const char *, rakam_number_t *);
rakam_status_t rakam_parse_and_keep_raw(const char *, size_t, const char *,
rakam_number_t *);
/* formatting */
rakam_status_t rakam_format(const rakam_number_t *, rakam_format_t,
char *, size_t, size_t *);
rakam_status_t rakam_format_out_of_country(const rakam_number_t *, const char *,
char *, size_t, size_t *);
rakam_status_t rakam_format_with_carrier_code(const rakam_number_t *, const char *,
char *, size_t, size_t *);
rakam_status_t rakam_format_in_original(const rakam_number_t *,
char *, size_t, size_t *);
/* validation */
bool rakam_is_valid(const rakam_number_t *);
bool rakam_is_possible(const rakam_number_t *);
bool rakam_is_valid_for_region(const rakam_number_t *, const char *);
/* metadata */
rakam_status_t rakam_get_example_number(const char *, rakam_type_t,
rakam_number_t *);
uint16_t rakam_country_code_for_region(const char *);
const char *rakam_main_region_for_country_code(uint16_t);
size_t rakam_supported_regions_count(void);
void rakam_iterate_regions(rakam_region_cb, void *);
uint8_t rakam_length_of_national_destination_code(const rakam_number_t *);
rakam_match_t rakam_match(const rakam_number_t *, const rakam_number_t *);
/* short numbers */
bool rakam_is_valid_short_number(const char *, size_t, const char *);
bool rakam_is_emergency_number (const char *, size_t, const char *);
/* AsYouTypeFormatter */
void rakam_aytf_init (rakam_aytf_t *, const char *);
void rakam_aytf_clear(rakam_aytf_t *);
const char *rakam_aytf_input(rakam_aytf_t *, char);
const char *rakam_aytf_current(const rakam_aytf_t *);
/* PhoneNumberMatcher */
size_t rakam_find_numbers(const char *, size_t, const char *,
rakam_find_cb, void *);- Every
*_status_tfunction returnsRAKAM_OKon success. - Formatter writes a NUL-terminator within
cap; onRAKAM_ERR_BUFFER_TOO_SMALLthe output is undefined. out_lenreceives the written length (without the NUL) on success.rakam_parsezeroes*outon entry, so reading fields of a failed parse is safe but givescountry_code = 0,national = "", etc.- Short-number queries and AsYouType functions tolerate
NULLregion / digits and returnfalse/ empty string respectively.
Default: -std=c11 -O3 -Wall -Wextra -Wpedantic -fPIC.
-
The SIMD path auto-selects: NEON on
__aarch64__, SSSE3 on__SSSE3__, otherwise a scalar 8-byte compaction loop. No configuration flag required. -
To regenerate the metadata tables after upgrading
libphonenumber/resources/:python3 tools/gen_metadata.py \ --xml ../libphonenumber/resources/PhoneNumberMetadata.xml \ --out-dir generatedThe generator also consumes
PhoneNumberAlternateFormats.xml,ShortNumberMetadata.xml, andgeocoding/en/1.txtfrom the same directory. -
To restrict the build to a subset of regions (useful for embedded targets):
python3 tools/gen_metadata.py --xml … --out-dir generated \ --only TR US GB DE