From 27d1a671acc9c64f788b5d07cb4bd676e3f19805 Mon Sep 17 00:00:00 2001 From: Juuso Rytilahti <rytilahti.juuso@gmail.com> Date: Mon, 9 Oct 2023 16:08:38 +0300 Subject: [PATCH] Basic version of the reviewing working --- translator_script.py | 89 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 6 deletions(-) diff --git a/translator_script.py b/translator_script.py index 17d0367..9ce8817 100644 --- a/translator_script.py +++ b/translator_script.py @@ -93,11 +93,64 @@ def get_improved_translation_for_chunk(chunk, translation, temperature=1, previo print("API HAS BEEN CALLED!") # Call API chat_completion = openai.ChatCompletion.create( - model=GPT_MODEL, + model=GPT_MODEL, + n=3, messages=all_messages, temperature= temperature, #TODO test with top_p=0.8 - top_p=0.8 # set the top_p to default so we get slightly different versions + top_p=1 # set the top_p to default so we get slightly different versions + ) + + if 'choices' in chat_completion and chat_completion['choices']: + #print(chat_completion) + reviews = [] + for i, review in enumerate(chat_completion['choices']): + reviews.append(chat_completion['choices'][i]['message']['content']) + + return reviews + else: + return None + +# return most recent chatGPT answer. +def get_review_of_the_translations(chunk, translations, temperature=1): + """ + translations = string array of the (improved) translations + returns Text, more specifically the review of the translations. + """ + openai.api_key = os.getenv('API_KEY') + + # Add the original chunk to help the review of the translations + message = "<OriginalText>"+chunk+"<OriginalText>" + for i, review in enumerate(translations): + # TODO test with closing tags. + message = message + "\n" + "<Translation"+str(i)+">"+review+ "<Translation"+str(i)+">" +"\n" + #TODO make language depend on stuff + #message ="English: \n"+chunk + "\n\n" + "Finnish:\n" + translation + + all_messages = [] + # Add previous messages for chatGPT to use them as example. + #if(previous_messages): + # all_messages = all_messages + previous_messages + + all_messages.append({ + "role": "system", + "content": REVIEW_TRANSLATIONS_PROMPT + }) + + # Add new message to the end + all_messages.append({ + "role": "user", + "content": message + }) + + print("API HAS BEEN CALLED!") + # Call API + chat_completion = openai.ChatCompletion.create( + model=GPT_MODEL, + messages=all_messages, + temperature= 0.85, + #TODO test with top_p=0.8 + top_p=1 # set the top_p to default so we get slightly different versions ) if 'choices' in chat_completion and chat_completion['choices']: @@ -255,7 +308,7 @@ def extract_final_translation(text): - str: Extracted content from <FINAL_TRANSLATION> tags. """ # Regular expression pattern to find content inside <FINAL_TRANSLATION> tags. - pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<\/FINAL_TRANSLATION>', re.DOTALL) + pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<(/?FINAL_TRANSLATION>)', re.DOTALL) # Searching for the pattern in the text. match = pattern.search(text) @@ -263,13 +316,27 @@ def extract_final_translation(text): # Returning the matched content if it exists, otherwise an empty string. return match.group(1) if match else '' +def extract_best_version_number(text): + """ + text: The review text. Should contain <BestVersion>NUMBER</BestVersion>, where the number is the best version of the translation. + return: int. If the number inside the tags is not found, returns 0 and print + """ + pattern = re.compile(r"<BestVersion>[^\d]*(\d+)[^\d]*</?BestVersion>") + match = pattern.search(text) + if match: + index = int(match.group(1)) + return index + else: + print("<BestVersion> tags not found. THIS SHOULD NEVER HAPPEN") + return 0 LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to Finnish. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements." GENERAL_TRANSLATION_PROMPT_PLAIN_TEXT_AND_MD = "You are a translator. Translate the material to English." # Not thoroughly tested, but should work for basic usage. TRANSLATE_AND_LOCALIZE_STUDY_MATERIAL_PROMPT_PLAIN_TEXT_OR_MD = "You are a translator. Localize and translate the study materials to English. Keep the meaning of the exercise in translation, but it does not need to be literal translation. If there are Finnish names change them to names used in England. Keep every actor the same." # Update the language (and the role) in the prompt to suit your needs. -MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Put the final version inside a custom tag <FINAL_TRANSLATION>." +MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Adapt the employed imageries and expressions to correspond more accurately with terms utilized in Finnish. Put the final version inside a custom tag <FINAL_TRANSLATION>." +REVIEW_TRANSLATIONS_PROMPT ="You are a translator and you speak fluent Finnish and English. Review the following translations. Explain your rationale. Focus on accuracy and fluency of the text. The original text is inside <OriginalText> tag. Finally, vote which is the best version and give its number inside a tag <BestVersion>." # ------------ SET-UP ------------ # Set the initial prompt @@ -328,10 +395,20 @@ if file_content: #GET IMPROVED VERSION WITH TRATIONALE improved_trans_with_rationale = get_improved_translation_for_chunk(chunk, trans) - write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale) + only_improved_translations = [] + for a, review in enumerate(improved_trans_with_rationale): + write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale_choice_"+str(a)+".md", review) + only_improved_translations.append(extract_final_translation(improved_trans_with_rationale[a])) + #write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale) + + #Review the translations + review_of_translations = get_review_of_the_translations(chunk, only_improved_translations) + write_to_file("./debug/chunk"+str(i)+"_REVIEW.md", review_of_translations) + best_version_num = extract_best_version_number(review_of_translations) + final_translation_of_chunk = only_improved_translations[best_version_num] #EXTRACT THE FINAL TRANSLATION - final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale) + #final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale[0]) write_to_file("./debug/chunk"+str(i)+"_final_translation.md", final_translation_of_chunk) final_text = final_text + final_translation_of_chunk -- GitLab