diff --git a/translator_script.py b/translator_script.py index 17d03674c028c20294ba8491f08ef2a5696f5a37..9ce8817761118343e29b70b408a5f0a635f28dd1 100644 --- a/translator_script.py +++ b/translator_script.py @@ -93,11 +93,64 @@ def get_improved_translation_for_chunk(chunk, translation, temperature=1, previo print("API HAS BEEN CALLED!") # Call API chat_completion = openai.ChatCompletion.create( - model=GPT_MODEL, + model=GPT_MODEL, + n=3, messages=all_messages, temperature= temperature, #TODO test with top_p=0.8 - top_p=0.8 # set the top_p to default so we get slightly different versions + top_p=1 # set the top_p to default so we get slightly different versions + ) + + if 'choices' in chat_completion and chat_completion['choices']: + #print(chat_completion) + reviews = [] + for i, review in enumerate(chat_completion['choices']): + reviews.append(chat_completion['choices'][i]['message']['content']) + + return reviews + else: + return None + +# return most recent chatGPT answer. +def get_review_of_the_translations(chunk, translations, temperature=1): + """ + translations = string array of the (improved) translations + returns Text, more specifically the review of the translations. + """ + openai.api_key = os.getenv('API_KEY') + + # Add the original chunk to help the review of the translations + message = "<OriginalText>"+chunk+"<OriginalText>" + for i, review in enumerate(translations): + # TODO test with closing tags. + message = message + "\n" + "<Translation"+str(i)+">"+review+ "<Translation"+str(i)+">" +"\n" + #TODO make language depend on stuff + #message ="English: \n"+chunk + "\n\n" + "Finnish:\n" + translation + + all_messages = [] + # Add previous messages for chatGPT to use them as example. + #if(previous_messages): + # all_messages = all_messages + previous_messages + + all_messages.append({ + "role": "system", + "content": REVIEW_TRANSLATIONS_PROMPT + }) + + # Add new message to the end + all_messages.append({ + "role": "user", + "content": message + }) + + print("API HAS BEEN CALLED!") + # Call API + chat_completion = openai.ChatCompletion.create( + model=GPT_MODEL, + messages=all_messages, + temperature= 0.85, + #TODO test with top_p=0.8 + top_p=1 # set the top_p to default so we get slightly different versions ) if 'choices' in chat_completion and chat_completion['choices']: @@ -255,7 +308,7 @@ def extract_final_translation(text): - str: Extracted content from <FINAL_TRANSLATION> tags. """ # Regular expression pattern to find content inside <FINAL_TRANSLATION> tags. - pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<\/FINAL_TRANSLATION>', re.DOTALL) + pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<(/?FINAL_TRANSLATION>)', re.DOTALL) # Searching for the pattern in the text. match = pattern.search(text) @@ -263,13 +316,27 @@ def extract_final_translation(text): # Returning the matched content if it exists, otherwise an empty string. return match.group(1) if match else '' +def extract_best_version_number(text): + """ + text: The review text. Should contain <BestVersion>NUMBER</BestVersion>, where the number is the best version of the translation. + return: int. If the number inside the tags is not found, returns 0 and print + """ + pattern = re.compile(r"<BestVersion>[^\d]*(\d+)[^\d]*</?BestVersion>") + match = pattern.search(text) + if match: + index = int(match.group(1)) + return index + else: + print("<BestVersion> tags not found. THIS SHOULD NEVER HAPPEN") + return 0 LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to Finnish. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements." GENERAL_TRANSLATION_PROMPT_PLAIN_TEXT_AND_MD = "You are a translator. Translate the material to English." # Not thoroughly tested, but should work for basic usage. TRANSLATE_AND_LOCALIZE_STUDY_MATERIAL_PROMPT_PLAIN_TEXT_OR_MD = "You are a translator. Localize and translate the study materials to English. Keep the meaning of the exercise in translation, but it does not need to be literal translation. If there are Finnish names change them to names used in England. Keep every actor the same." # Update the language (and the role) in the prompt to suit your needs. -MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Put the final version inside a custom tag <FINAL_TRANSLATION>." +MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Adapt the employed imageries and expressions to correspond more accurately with terms utilized in Finnish. Put the final version inside a custom tag <FINAL_TRANSLATION>." +REVIEW_TRANSLATIONS_PROMPT ="You are a translator and you speak fluent Finnish and English. Review the following translations. Explain your rationale. Focus on accuracy and fluency of the text. The original text is inside <OriginalText> tag. Finally, vote which is the best version and give its number inside a tag <BestVersion>." # ------------ SET-UP ------------ # Set the initial prompt @@ -328,10 +395,20 @@ if file_content: #GET IMPROVED VERSION WITH TRATIONALE improved_trans_with_rationale = get_improved_translation_for_chunk(chunk, trans) - write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale) + only_improved_translations = [] + for a, review in enumerate(improved_trans_with_rationale): + write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale_choice_"+str(a)+".md", review) + only_improved_translations.append(extract_final_translation(improved_trans_with_rationale[a])) + #write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale) + + #Review the translations + review_of_translations = get_review_of_the_translations(chunk, only_improved_translations) + write_to_file("./debug/chunk"+str(i)+"_REVIEW.md", review_of_translations) + best_version_num = extract_best_version_number(review_of_translations) + final_translation_of_chunk = only_improved_translations[best_version_num] #EXTRACT THE FINAL TRANSLATION - final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale) + #final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale[0]) write_to_file("./debug/chunk"+str(i)+"_final_translation.md", final_translation_of_chunk) final_text = final_text + final_translation_of_chunk