defcounts(self): # 统计语料中的所有单词和其出现的次数 with open(self.FILENAME,"r",encoding="utf-8") as f: data=f.read() words=re.findall("[a-z]+",data.lower()) return Counter(words)
defchange_one(self, word): length = len(word) # 删除单词内每一个字母生成一组新的单词 new_words = [word[0:i] + word[i + 1:] for i in range(length)] # 修改单词内的每一个字母成为其他词,生成新的一组单词 new_words += [word[0:i] + c + word[i + 1:] for i in range(length) for c inself.ZIMU] # 在单词中的每一个间隔位中插入字母 new_words += [word[0:i] + c + word[i:] for i in range(length + 1) for c inself.ZIMU] # 调换单词内字母位置 new_words += [word[0:i] + word[i + 1] + word[i] + word[i + 2:] for i in range(length - 1)] return set(new_words)
defchange_two(self, word): # 将单词变换一个位数后,在变换一个位数,最后将原单词+变化一位的单词+变化两位的单词 return set(word_two for word_one inself.change_one(self.WORDSCOUNT) \ for word_two inself.change_one(word_one) \ if word_two inself.WORDSCOUNT)
defis_true_words(self, word): # 判断word中的所有单词是否在语料中 return set(w for w in word if w inself.WORDSCOUNT)