Skip to content
Snippets Groups Projects
Commit 27d1a671 authored by Juuso Rytilahti's avatar Juuso Rytilahti
Browse files

Basic version of the reviewing working

parent 34bce05d
No related branches found
No related tags found
No related merge requests found
...@@ -94,10 +94,63 @@ def get_improved_translation_for_chunk(chunk, translation, temperature=1, previo ...@@ -94,10 +94,63 @@ def get_improved_translation_for_chunk(chunk, translation, temperature=1, previo
# Call API # Call API
chat_completion = openai.ChatCompletion.create( chat_completion = openai.ChatCompletion.create(
model=GPT_MODEL, model=GPT_MODEL,
n=3,
messages=all_messages, messages=all_messages,
temperature= temperature, temperature= temperature,
#TODO test with top_p=0.8 #TODO test with top_p=0.8
top_p=0.8 # set the top_p to default so we get slightly different versions top_p=1 # set the top_p to default so we get slightly different versions
)
if 'choices' in chat_completion and chat_completion['choices']:
#print(chat_completion)
reviews = []
for i, review in enumerate(chat_completion['choices']):
reviews.append(chat_completion['choices'][i]['message']['content'])
return reviews
else:
return None
# return most recent chatGPT answer.
def get_review_of_the_translations(chunk, translations, temperature=1):
"""
translations = string array of the (improved) translations
returns Text, more specifically the review of the translations.
"""
openai.api_key = os.getenv('API_KEY')
# Add the original chunk to help the review of the translations
message = "<OriginalText>"+chunk+"<OriginalText>"
for i, review in enumerate(translations):
# TODO test with closing tags.
message = message + "\n" + "<Translation"+str(i)+">"+review+ "<Translation"+str(i)+">" +"\n"
#TODO make language depend on stuff
#message ="English: \n"+chunk + "\n\n" + "Finnish:\n" + translation
all_messages = []
# Add previous messages for chatGPT to use them as example.
#if(previous_messages):
# all_messages = all_messages + previous_messages
all_messages.append({
"role": "system",
"content": REVIEW_TRANSLATIONS_PROMPT
})
# Add new message to the end
all_messages.append({
"role": "user",
"content": message
})
print("API HAS BEEN CALLED!")
# Call API
chat_completion = openai.ChatCompletion.create(
model=GPT_MODEL,
messages=all_messages,
temperature= 0.85,
#TODO test with top_p=0.8
top_p=1 # set the top_p to default so we get slightly different versions
) )
if 'choices' in chat_completion and chat_completion['choices']: if 'choices' in chat_completion and chat_completion['choices']:
...@@ -255,7 +308,7 @@ def extract_final_translation(text): ...@@ -255,7 +308,7 @@ def extract_final_translation(text):
- str: Extracted content from <FINAL_TRANSLATION> tags. - str: Extracted content from <FINAL_TRANSLATION> tags.
""" """
# Regular expression pattern to find content inside <FINAL_TRANSLATION> tags. # Regular expression pattern to find content inside <FINAL_TRANSLATION> tags.
pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<\/FINAL_TRANSLATION>', re.DOTALL) pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<(/?FINAL_TRANSLATION>)', re.DOTALL)
# Searching for the pattern in the text. # Searching for the pattern in the text.
match = pattern.search(text) match = pattern.search(text)
...@@ -263,13 +316,27 @@ def extract_final_translation(text): ...@@ -263,13 +316,27 @@ def extract_final_translation(text):
# Returning the matched content if it exists, otherwise an empty string. # Returning the matched content if it exists, otherwise an empty string.
return match.group(1) if match else '' return match.group(1) if match else ''
def extract_best_version_number(text):
"""
text: The review text. Should contain <BestVersion>NUMBER</BestVersion>, where the number is the best version of the translation.
return: int. If the number inside the tags is not found, returns 0 and print
"""
pattern = re.compile(r"<BestVersion>[^\d]*(\d+)[^\d]*</?BestVersion>")
match = pattern.search(text)
if match:
index = int(match.group(1))
return index
else:
print("<BestVersion> tags not found. THIS SHOULD NEVER HAPPEN")
return 0
LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to Finnish. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements." LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to Finnish. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements."
GENERAL_TRANSLATION_PROMPT_PLAIN_TEXT_AND_MD = "You are a translator. Translate the material to English." # Not thoroughly tested, but should work for basic usage. GENERAL_TRANSLATION_PROMPT_PLAIN_TEXT_AND_MD = "You are a translator. Translate the material to English." # Not thoroughly tested, but should work for basic usage.
TRANSLATE_AND_LOCALIZE_STUDY_MATERIAL_PROMPT_PLAIN_TEXT_OR_MD = "You are a translator. Localize and translate the study materials to English. Keep the meaning of the exercise in translation, but it does not need to be literal translation. If there are Finnish names change them to names used in England. Keep every actor the same." TRANSLATE_AND_LOCALIZE_STUDY_MATERIAL_PROMPT_PLAIN_TEXT_OR_MD = "You are a translator. Localize and translate the study materials to English. Keep the meaning of the exercise in translation, but it does not need to be literal translation. If there are Finnish names change them to names used in England. Keep every actor the same."
# Update the language (and the role) in the prompt to suit your needs. # Update the language (and the role) in the prompt to suit your needs.
MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Put the final version inside a custom tag <FINAL_TRANSLATION>." MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Adapt the employed imageries and expressions to correspond more accurately with terms utilized in Finnish. Put the final version inside a custom tag <FINAL_TRANSLATION>."
REVIEW_TRANSLATIONS_PROMPT ="You are a translator and you speak fluent Finnish and English. Review the following translations. Explain your rationale. Focus on accuracy and fluency of the text. The original text is inside <OriginalText> tag. Finally, vote which is the best version and give its number inside a tag <BestVersion>."
# ------------ SET-UP ------------ # ------------ SET-UP ------------
# Set the initial prompt # Set the initial prompt
...@@ -328,10 +395,20 @@ if file_content: ...@@ -328,10 +395,20 @@ if file_content:
#GET IMPROVED VERSION WITH TRATIONALE #GET IMPROVED VERSION WITH TRATIONALE
improved_trans_with_rationale = get_improved_translation_for_chunk(chunk, trans) improved_trans_with_rationale = get_improved_translation_for_chunk(chunk, trans)
write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale) only_improved_translations = []
for a, review in enumerate(improved_trans_with_rationale):
write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale_choice_"+str(a)+".md", review)
only_improved_translations.append(extract_final_translation(improved_trans_with_rationale[a]))
#write_to_file("./debug/chunk" + str(i) + "_improved_translation_with_rationale.md", improved_trans_with_rationale)
#Review the translations
review_of_translations = get_review_of_the_translations(chunk, only_improved_translations)
write_to_file("./debug/chunk"+str(i)+"_REVIEW.md", review_of_translations)
best_version_num = extract_best_version_number(review_of_translations)
final_translation_of_chunk = only_improved_translations[best_version_num]
#EXTRACT THE FINAL TRANSLATION #EXTRACT THE FINAL TRANSLATION
final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale) #final_translation_of_chunk = extract_final_translation(improved_trans_with_rationale[0])
write_to_file("./debug/chunk"+str(i)+"_final_translation.md", final_translation_of_chunk) write_to_file("./debug/chunk"+str(i)+"_final_translation.md", final_translation_of_chunk)
final_text = final_text + final_translation_of_chunk final_text = final_text + final_translation_of_chunk
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment