Source code for chatbot.nlp.entity_analyzer

from chatbot.common.chat_share_data import ShareData
# from konlpy.tag import Kkma
# from konlpy.tag import Twitter
from chatbot.common.chat_knowledge_mem_dict import ChatKnowledgeMemDict
from konlpy.tag import Mecab
import logging

[docs]class EntityAnalyzer(ShareData): """ parse raw text to tageed, entity filterd sentence ※ Example input : I bought a car yesterday output : I bought a car [time] """ def __init__(self, cb_id): """ init global variables """ #self.proper_key_list = sorted(proper_noun.keys(), key=lambda x : proper_noun[x][0], reverse=False) #Sorted Key Priority self.cb_id = cb_id self.proper_key_list = ChatKnowledgeMemDict.data_order.get(self.cb_id) self.proper_noun = ChatKnowledgeMemDict.data.get(self.cb_id).get('proper_noun') # key : [values]
[docs] def parse(self, share_data): """ parse input with entity list :param share_data: :return: """ try : input_data = share_data.get_request_data() pos_tags = self._pos_tagger(input_data) logging.info("■■■■■■■■■■ 형태소 분석 결과 : " + str(pos_tags)) result = list(map(lambda x : self._preprocess_data(share_data,x), pos_tags)) # Remove preposition result = list(filter(lambda x : x[0] != "", result)) convert_dict_data = list(map(lambda x : x[1] ,result)) morphed_data = list(map(lambda x : x[0] ,result)) share_data.set_convert_dict_data(convert_dict_data) share_data.set_morphed_data(morphed_data) logging.info("■■■■■■■■■■ Entity 분석 결과 : " + str(convert_dict_data)) return share_data except Exception as e : raise Exception ("error on entity anal : {0}".format(e))
#Custom Case : ex)"hi and hello" and len < 3 def _preprocess_data(self, share_data, pos_tags): #except meaningless convert_dict_data = pos_tags[0] pos_tags_0 = pos_tags[0] if (pos_tags[1] in ['NNG', 'NNP','SL'] and len(pos_tags[0]) > 1): #Check only Noun key_slot = pos_tags[0] key_check = list(filter(lambda x : self._extract_proper_entity(pos_tags[0], x), self.proper_key_list)) if(key_check == []): pass else: #proper noun priority # except duplicated if(self.proper_noun[key_check[0]][2]): key_slot = share_data.get_story_slot_entity(key_check[0])[0] + " " + pos_tags[0] if share_data.get_story_slot_entity(key_check[0]) is not None else "" + pos_tags[0] share_data.set_story_slot_entity(key_check[0], [key_slot]) convert_dict_data = key_check[0] elif (pos_tags[1] in ['SY', 'EC', 'EP', 'VA', 'VX', 'XSV+EC', 'VX+EC', 'VX+EF', 'SF', 'VCP+EF', 'ETN', 'ETM', 'JKO', 'EF','VCP+EC','SSO','SSC','EP+EF']): return "","" return pos_tags_0, convert_dict_data def _pos_tagger(self, input, type ='mecab'): """ :param input: :return: """ if(type == 'mecab') : mecab = Mecab('/usr/local/lib/mecab/dic/mecab-ko-dic') return mecab.pos(str(input)) # elif(type == 'kkma') : # kkma = Kkma() # return kkma.pos(str(input)) # # elif(type == 'twitter') : # twitter = Twitter(jvmpath=None) # return twitter.pos(str(input)) def _extract_proper_entity(self, value, key): exist = False value = value.lower() input_file = ChatKnowledgeMemDict.data.get(self.cb_id).get(key) if(input_file is not None): for line in input_file: if(self.proper_noun.get(key)[2] and line.lower().strip().find(value) > -1): exist = True break elif(line.lower().strip() == value): exist = True break return exist