@@ -610,12 +610,16 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
610610 prefix_len = 6 ; // IonTorrent
611611 * fixed_len = 6 ;
612612 * is_fixed = 1 ;
613- } else if (l > 37 && d [f + 8 ] == '-' && d [f + 13 ] == '-' && d [f + 18 ] == '-' && d [f + 23 ] == '-' &&
614- ((d [f + 0 ] >= '0' && d [f + 0 ] <='9' ) || (d [f + 0 ] >= 'a' && d [f + 0 ] <= 'f' )) &&
615- ((d [f + 35 ] >= '0' && d [f + 35 ] <='9' ) || (d [f + 35 ] >= 'a' && d [f + 35 ] <= 'f' ))) {
613+ } else if (l >= 36
614+ && d [f + 8 ]== '-' && d [f + 13 ]== '-' && d [f + 18 ]== '-' && d [f + 23 ]== '-'
615+ && isxdigit ((uint8_t )d [f + 0 ]) && isxdigit ((uint8_t )d [f + 7 ])
616+ && isxdigit ((uint8_t )d [f + 9 ]) && isxdigit ((uint8_t )d [f + 12 ])
617+ && isxdigit ((uint8_t )d [f + 14 ]) && isxdigit ((uint8_t )d [f + 17 ])
618+ && isxdigit ((uint8_t )d [f + 19 ]) && isxdigit ((uint8_t )d [f + 22 ])
619+ && isxdigit ((uint8_t )d [f + 24 ]) && isxdigit ((uint8_t )d [f + 35 ])) {
616620 // ONT: f33d30d5-6eb8-4115-8f46-154c2620a5da_Basecall_1D_template...
617- prefix_len = 37 ;
618- * fixed_len = 37 ;
621+ prefix_len = 36 ;
622+ * fixed_len = 36 ;
619623 * is_fixed = 1 ;
620624 } else {
621625 // Check Illumina and trim back to lane:tile:x:y.
@@ -638,7 +642,6 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
638642 * is_fixed = 0 ;
639643 }
640644 }
641- //prefix_len = INT_MAX;
642645
643646 if (!ctx -> t_head ) {
644647 ctx -> t_head = calloc (1 , sizeof (* ctx -> t_head ));
@@ -647,6 +650,7 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
647650 }
648651
649652 // Find an item in the trie
653+ int from_punct = from ;
650654 for (nlines = i = 0 ; i < len ; i ++ , nlines ++ ) {
651655 t = ctx -> t_head ;
652656 while (i < len && data [i ] > '\n' ) {
@@ -661,24 +665,18 @@ int search_trie(name_context *ctx, char *data, size_t len, int n, int *exact, in
661665 x = x -> sibling ;
662666 t = x ;
663667
664- // t = t->next[c];
665-
666- // if (!t)
667- // return -1;
668-
669668 from = t -> n ;
669+ if ((ispunct (c ) || isspace (c )) && t -> n != n )
670+ from_punct = t -> n ;
670671 if (i == prefix_len ) p3 = t -> n ;
671- //if (t->count >= .0035*ctx->t_head->count && t->n != n) p3 = t->n; // pacbio
672- //if (i == 60) p3 = t->n; // pacbio
673- //if (i == 7) p3 = t->n; // iontorrent
674672 t -> n = n ;
675673 }
676674 }
677675
678676 //printf("Looked for %d, found %d, prefix %d\n", n, from, p3);
679677
680678 * exact = (n != from ) && len ;
681- return * exact ? from : p3 ;
679+ return * exact ? from : ( p3 != -1 ? p3 : from_punct ) ;
682680}
683681
684682
@@ -731,17 +729,8 @@ static int encode_name(name_context *ctx, char *name, int len, int mode) {
731729 encode_token_diff (ctx , cnum - pnum );
732730 int ntok = 1 ;
733731
734- // Look for common form of UUID4 names and special case them
735- i = 0 ;
736- if (len == 36 ) {
737- for (i = 0 ; i < len ; i ++ ) {
738- if (!(isxdigit ((uint8_t )name [i ]) || name [i ] == '-' ))
739- break ;
740- }
741- }
742-
743- // Is uuid4 (eg ONT).
744- if (i == len ) {
732+ // ONT uuid4: identified in search_trie
733+ if (fixed_len == 36 ) {
745734 if (37 >= ctx -> max_tok ) {
746735 do {
747736 memset (& ctx -> desc [ctx -> max_tok << 4 ], 0 , 16 * sizeof (ctx -> desc [0 ]));
0 commit comments