import re ''' Grammar reference all_possible_match_strings = [ "силка", "силки", "силки", "силок", "силці", "силкам", "силку", "силки", "силкою", "силками", "силці", "силках", "силко", "силки", "лінк", "лінка", "лінки", "лінку", "лінки", "лінок", "лінку", "лінці", "лінкам", "лінк", "лінку", "лінки", "лінком", "лінкою", "лінками", "лінку", "лінці", "лінках", "лінке", "лінко", "лінки" ] ''' EXTRACT_PADDING = 11 EXTRACT_INCLUDE_WHOLE_WORDS = True corrections = [ # IDs "посилання", # 0 "посиланню", # 1 "посиланням", # 2 "на посиланні", # 3 "посилань", # 4 "посиланнями", # 5 "посиланнях" # 6 ] replacements = [ ["силка", "силки", "силку", "силко", "лінк", "лінка", "лінки", "лінку", "лінке", "лінко"], ["силці", "лінку", "лінці"], ["силкам", "силкою", "лінкам", "лінком", "лінкою"], ["на силці", "на лінку", "на лінці"], ["силок", "лінок"], ["силками", "лінками"], ["силках", "лінках"] ] #unique_match_strings = set(all_possible_match_strings) #ua_alphabet = "абвгґдеєжзиіїйклмнопрстуфхцчшщьюя" #regex_matchers = [re.compile(fr"((?<=[^{ua_alphabet}])|(?<=\b)|(?<=^)|(?<= )){i}((?=[^{ua_alphabet}])|(?=\b)|(?=$)|(?= ))", re.DEBUG) # for i in unique_match_strings] ua_alphabet = "абвгґдеєжзиіїйклмнопрстуфхцчшщьюя" for i, group in enumerate(replacements): for j, match_word in enumerate(group): #replacements[i][j] = re.compile(fr"((?<=[^{ua_alphabet}])|(?<=\b)|(?<=^)|(?<= )){i}((?=[^{ua_alphabet}])|(?=\b)|(?=$)|(?= ))") replacements[i][j] = [match_word, re.compile(fr"((?<=[^{ua_alphabet}])|(?<=\b)|(?<=^)|(?<= )){match_word}((?=[^{ua_alphabet}])|(?=\b)|(?=$)|(?= ))")] #print(replacements[i][j]) #_ = [print(fr"(?<=[^абвгґдеєжзиіїйклмнопрстуфхцчшщьюя]){i}(?=[^абвгґдеєжзиіїйклмнопрстуфхцчшщьюя])") for i in unique_match_strings] def process(message, path): lowercase_message = message.text.lower() for correct_word_id, group in enumerate(replacements): for match_word, matcher in group: result = matcher.search(lowercase_message) if result: l = len(message.text) mistake_start = result.start() mistake_end = result.end() print(mistake_start, mistake_end) original_text_before = message.text[max(mistake_start-EXTRACT_PADDING,0):mistake_start] original_text_after = message.text[mistake_end:min(mistake_end+EXTRACT_PADDING,l)] original_text_mistake = message.text[mistake_start:mistake_end] if EXTRACT_INCLUDE_WHOLE_WORDS: while 0 <= mistake_start - EXTRACT_PADDING - 1 < l and \ message.text[mistake_start-EXTRACT_PADDING-1].isalnum(): mistake_start -= 1 original_text_before = message.text[max(mistake_start-EXTRACT_PADDING,0):result.start()] while 0 <= mistake_end + EXTRACT_PADDING < l and \ message.text[mistake_end+EXTRACT_PADDING].isalnum(): mistake_end += 1 original_text_after = message.text[result.end():min(mistake_end+EXTRACT_PADDING,l)] if len(message.text[:mistake_start]) > EXTRACT_PADDING: original_text_before_continue = "..." else: original_text_before_continue = "" if len(message.text[mistake_end:]) > EXTRACT_PADDING: original_text_after_continue = "..." else: original_text_after_continue = "" original_extract = original_text_before_continue + original_text_before \ + original_text_mistake + original_text_after + original_text_after_continue correct_word = corrections[correct_word_id] if original_text_mistake == match_word.capitalize(): correct_word = corrections[correct_word_id].capitalize() elif original_text_mistake == match_word.upper(): correct_word = corrections[correct_word_id].upper() fixed_extract = original_text_before_continue + original_text_before \ + correct_word + original_text_after + original_text_after_continue return f'"{original_extract}" -> "{fixed_extract}" 🌚', None else: return "", None