Skip to content

Commit c820814

Browse files
committed
OPM
1 parent 9e29ed1 commit c820814

5 files changed

Lines changed: 558 additions & 98 deletions

File tree

nebulento/__init__.py

Lines changed: 2 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,2 @@
1-
import logging
2-
from nebulento.fuzz import MatchStrategy, match_one
3-
from nebulento.bracket_expansion import expand_template, expand_slots
4-
import quebra_frases
5-
6-
LOG = logging.getLogger('nebulento')
7-
8-
9-
class IntentContainer:
10-
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
11-
ignore_case=True):
12-
self.fuzzy_strategy = fuzzy_strategy
13-
self.ignore_case = ignore_case
14-
self.registered_intents = {}
15-
self.registered_entities = {}
16-
17-
def match_entities(self, sentence):
18-
if self.ignore_case:
19-
sentence = sentence.lower()
20-
matches = {}
21-
for entity, samples in self.registered_entities.items():
22-
chunked = quebra_frases.chunk(sentence, samples)
23-
matches[entity] = [s for s in samples if s in chunked]
24-
return matches
25-
26-
def match_fuzzy(self, sentence):
27-
if self.ignore_case:
28-
sentence = sentence.lower()
29-
entities = self.match_entities(sentence)
30-
for intent, samples in self.registered_intents.items():
31-
samples = self.registered_intents[intent]
32-
33-
sent, score = match_one(sentence, samples,
34-
strategy=self.fuzzy_strategy)
35-
remainder = [
36-
w for w in quebra_frases.word_tokenize(sentence)
37-
if w not in quebra_frases.word_tokenize(sent)]
38-
consumed = [
39-
w for w in quebra_frases.word_tokenize(sentence)
40-
if w in quebra_frases.word_tokenize(sent)]
41-
42-
tagged_entities = {}
43-
for ent, v in entities.items():
44-
if v and any("{" + ent + "}" in s for s in samples):
45-
score = 0.25 + score * 0.75
46-
tagged_entities[ent] = v
47-
consumed += [_ for _ in v if _ not in consumed]
48-
remainder = [_ for _ in remainder if _ not in v]
49-
remainder = " ".join(remainder)
50-
consumed = " ".join(consumed)
51-
yield {"best_match": sent,
52-
"conf": min(score, 1),
53-
"entities": tagged_entities,
54-
"match_strategy": self.fuzzy_strategy.name,
55-
"utterance": sentence,
56-
"utterance_remainder": remainder,
57-
"utterance_consumed": consumed,
58-
"name": intent}
59-
60-
def add_intent(self, name, lines):
61-
expanded = []
62-
for l in lines:
63-
expanded += expand_template(l)
64-
if self.ignore_case:
65-
expanded = [l.lower() for l in expanded]
66-
self.registered_intents[name] = expanded
67-
68-
def remove_intent(self, name):
69-
if name in self.registered_intents:
70-
del self.registered_intents[name]
71-
72-
def add_entity(self, name, lines):
73-
expanded = []
74-
for l in lines:
75-
expanded += expand_template(l)
76-
if self.ignore_case:
77-
expanded = [l.lower() for l in expanded]
78-
self.registered_entities[name] = expanded
79-
80-
def remove_entity(self, name):
81-
if name in self.registered_entities:
82-
del self.registered_entities[name]
83-
84-
def calc_intents(self, query):
85-
for intent in self.match_fuzzy(query):
86-
yield intent
87-
88-
def calc_intent(self, query):
89-
return max(
90-
self.calc_intents(query),
91-
key=lambda x: x["conf"],
92-
default={"best_match": None,
93-
"conf": 0,
94-
"match_strategy": self.fuzzy_strategy,
95-
"utterance": query,
96-
"name": None}
97-
)
1+
from nebulento.container import IntentContainer
2+
from nebulento.domain_engine import DomainIntentContainer

nebulento/container.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import logging
2+
from nebulento.fuzz import MatchStrategy, match_one
3+
from nebulento.bracket_expansion import expand_template, expand_slots
4+
import quebra_frases
5+
6+
LOG = logging.getLogger('nebulento')
7+
8+
9+
class IntentContainer:
10+
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
11+
ignore_case=True):
12+
self.fuzzy_strategy = fuzzy_strategy
13+
self.ignore_case = ignore_case
14+
self.registered_intents = {}
15+
self.registered_entities = {}
16+
17+
@property
18+
def intent_names(self):
19+
return list(self.registered_intents)
20+
21+
def match_entities(self, sentence):
22+
if self.ignore_case:
23+
sentence = sentence.lower()
24+
matches = {}
25+
for entity, samples in self.registered_entities.items():
26+
chunked = quebra_frases.chunk(sentence, samples)
27+
matches[entity] = [s for s in samples if s in chunked]
28+
return matches
29+
30+
def match_fuzzy(self, sentence):
31+
if self.ignore_case:
32+
sentence = sentence.lower()
33+
entities = self.match_entities(sentence)
34+
for intent, samples in self.registered_intents.items():
35+
samples = self.registered_intents[intent]
36+
37+
sent, score = match_one(sentence, samples,
38+
strategy=self.fuzzy_strategy)
39+
remainder = [
40+
w for w in quebra_frases.word_tokenize(sentence)
41+
if w not in quebra_frases.word_tokenize(sent)]
42+
consumed = [
43+
w for w in quebra_frases.word_tokenize(sentence)
44+
if w in quebra_frases.word_tokenize(sent)]
45+
46+
tagged_entities = {}
47+
for ent, v in entities.items():
48+
if v and any("{" + ent + "}" in s for s in samples):
49+
score = 0.25 + score * 0.75
50+
tagged_entities[ent] = v
51+
consumed += [_ for _ in v if _ not in consumed]
52+
remainder = [_ for _ in remainder if _ not in v]
53+
remainder = " ".join(remainder)
54+
consumed = " ".join(consumed)
55+
yield {"best_match": sent,
56+
"conf": min(score, 1),
57+
"entities": tagged_entities,
58+
"match_strategy": self.fuzzy_strategy.name,
59+
"utterance": sentence,
60+
"utterance_remainder": remainder,
61+
"utterance_consumed": consumed,
62+
"name": intent}
63+
64+
def add_intent(self, name, lines):
65+
expanded = []
66+
for l in lines:
67+
expanded += expand_template(l)
68+
if self.ignore_case:
69+
expanded = [l.lower() for l in expanded]
70+
self.registered_intents[name] = expanded
71+
72+
def remove_intent(self, name):
73+
if name in self.registered_intents:
74+
del self.registered_intents[name]
75+
76+
def add_entity(self, name, lines):
77+
expanded = []
78+
for l in lines:
79+
expanded += expand_template(l)
80+
if self.ignore_case:
81+
expanded = [l.lower() for l in expanded]
82+
self.registered_entities[name] = expanded
83+
84+
def remove_entity(self, name):
85+
if name in self.registered_entities:
86+
del self.registered_entities[name]
87+
88+
def calc_intents(self, query):
89+
for intent in self.match_fuzzy(query):
90+
yield intent
91+
92+
def calc_intent(self, query):
93+
return max(
94+
self.calc_intents(query),
95+
key=lambda x: x["conf"],
96+
default={"best_match": None,
97+
"conf": 0,
98+
"match_strategy": self.fuzzy_strategy,
99+
"utterance": query,
100+
"name": None}
101+
)

nebulento/domain_engine.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
from collections import defaultdict
2+
from typing import Dict, List, Optional
3+
4+
from nebulento.container import IntentContainer
5+
from nebulento.fuzz import MatchStrategy
6+
7+
8+
class DomainIntentContainer:
9+
"""
10+
A domain-aware intent recognition engine that organizes intents and entities
11+
into specific domains, providing flexible and hierarchical intent matching.
12+
"""
13+
14+
def __init__(self, fuzzy_strategy=MatchStrategy.DAMERAU_LEVENSHTEIN_SIMILARITY,
15+
ignore_case=True):
16+
"""
17+
Initialize the DomainIntentContainer.
18+
19+
Attributes:
20+
domain_engine (IntentContainer): A top-level intent container for cross-domain calculations.
21+
domains (Dict[str, IntentContainer]): A mapping of domain names to their respective intent containers.
22+
training_data (Dict[str, List[str]]): A mapping of domain names to their associated training samples.
23+
"""
24+
self.fuzzy_strategy = fuzzy_strategy
25+
self.ignore_case = ignore_case
26+
self.domain_engine = IntentContainer(fuzzy_strategy=fuzzy_strategy, ignore_case=ignore_case)
27+
self.domains: Dict[str, IntentContainer] = {}
28+
self.training_data: Dict[str, List[str]] = defaultdict(list)
29+
self.must_train = True
30+
31+
def remove_domain(self, domain_name: str):
32+
"""
33+
Remove a domain and its associated intents and training data.
34+
35+
Args:
36+
domain_name (str): The name of the domain to remove.
37+
"""
38+
if domain_name in self.training_data:
39+
self.training_data.pop(domain_name)
40+
if domain_name in self.domains:
41+
self.domains.pop(domain_name)
42+
if domain_name in self.domain_engine.intent_names:
43+
self.domain_engine.remove_intent(domain_name)
44+
45+
def register_domain_intent(self, domain_name: str, intent_name: str, intent_samples: List[str]):
46+
"""
47+
Register an intent within a specific domain.
48+
49+
Args:
50+
domain_name (str): The name of the domain.
51+
intent_name (str): The name of the intent to register.
52+
intent_samples (List[str]): A list of sample sentences for the intent.
53+
"""
54+
if domain_name not in self.domains:
55+
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
56+
ignore_case=self.ignore_case)
57+
self.domains[domain_name].add_intent(intent_name, intent_samples)
58+
self.training_data[domain_name] += intent_samples
59+
self.must_train = True
60+
61+
def remove_domain_intent(self, domain_name: str, intent_name: str):
62+
"""
63+
Remove a specific intent from a domain.
64+
65+
Args:
66+
domain_name (str): The name of the domain.
67+
intent_name (str): The name of the intent to remove.
68+
"""
69+
if domain_name in self.domains:
70+
self.domains[domain_name].remove_intent(intent_name)
71+
72+
def register_domain_entity(self, domain_name: str, entity_name: str, entity_samples: List[str]):
73+
"""
74+
Register an entity within a specific domain.
75+
76+
Args:
77+
domain_name (str): The name of the domain.
78+
entity_name (str): The name of the entity to register.
79+
entity_samples (List[str]): A list of sample phrases for the entity.
80+
"""
81+
if domain_name not in self.domains:
82+
self.domains[domain_name] = IntentContainer(fuzzy_strategy=self.fuzzy_strategy,
83+
ignore_case=self.ignore_case)
84+
self.domains[domain_name].add_entity(entity_name, entity_samples)
85+
86+
def remove_domain_entity(self, domain_name: str, entity_name: str):
87+
"""
88+
Remove a specific entity from a domain.
89+
90+
Args:
91+
domain_name (str): The name of the domain.
92+
entity_name (str): The name of the entity to remove.
93+
"""
94+
if domain_name in self.domains:
95+
self.domains[domain_name].remove_entity(entity_name)
96+
97+
def calc_domain(self, query: str):
98+
"""
99+
Calculate the best matching domain for a query.
100+
101+
Args:
102+
query (str): The input query.
103+
104+
Returns:
105+
MatchData: The best matching domain.
106+
"""
107+
return self.domain_engine.calc_intent(query)
108+
109+
def calc_intent(self, query: str, domain: Optional[str] = None):
110+
"""
111+
Calculate the best matching intent for a query within a specific domain.
112+
113+
Args:
114+
query (str): The input query.
115+
domain (Optional[str]): The domain to limit the search to. Defaults to None.
116+
117+
Returns:
118+
MatchData: The best matching intent.
119+
"""
120+
domain: str = domain or self.domain_engine.calc_intent(query).name
121+
if domain in self.domains:
122+
return self.domains[domain].calc_intent(query)
123+
return {"best_match": None,
124+
"conf": 0,
125+
"match_strategy": self.fuzzy_strategy,
126+
"utterance": query,
127+
"name": None}

0 commit comments

Comments
 (0)