Skip to content
Snippets Groups Projects
Commit 87529cb3 authored by Juuso Rytilahti's avatar Juuso Rytilahti
Browse files

Multistep_ugly_implementation

parent ddfc0be0
No related branches found
No related tags found
No related merge requests found
# General imports
import os
import re
import openai
import copy
# Similarity imports
......@@ -61,6 +62,50 @@ def get_translation_for_chunk(chunk,i, temperature=1, previous_messages=None):
return chat_completion['choices'][0]['message']['content']
else:
return None
# return most recent chatGPT answer.
def get_improved_translation_for_chunk(chunk, translation, temperature=1, previous_messages=None):
"""
chunk = original text
translation = translated version
returns the improved version + the rationale
"""
openai.api_key = os.getenv('API_KEY')
#TODO make language depend on stuff
message ="English: \n"+chunk + "\n\n" + "Finnish:\n" + translation
all_messages = []
# Add previous messages for chatGPT to use them as example.
#if(previous_messages):
# all_messages = all_messages + previous_messages
all_messages.append({
"role": "system",
"content": MULTI_STEP_TRANSLATION_PROMPT
})
# Add new message to the end
all_messages.append({
"role": "user",
"content": message
})
print("API HAS BEEN CALLED!")
# Call API
chat_completion = openai.ChatCompletion.create(
model=GPT_MODEL,
messages=all_messages,
temperature= temperature,
#TODO test with top_p=0.8
top_p=1 # set the top_p to default so we get slightly different versions
)
if 'choices' in chat_completion and chat_completion['choices']:
#print(chat_completion)
return chat_completion['choices'][0]['message']['content']
else:
return None
def create_messages(prompt, serverAnswer):
return [
......@@ -199,13 +244,36 @@ def are_texts_similar(text1, text2, threshold=0.987):
print("similarity is: " + similarity.astype(str))
return similarity > threshold
LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to English. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements."
def extract_final_translation(text):
"""
Extracts content enclosed by <FINAL_TRANSLATION> tags from the provided text.
Args:
- text (str): A string containing <FINAL_TRANSLATION> tags.
Returns:
- str: Extracted content from <FINAL_TRANSLATION> tags.
"""
# Regular expression pattern to find content inside <FINAL_TRANSLATION> tags.
pattern = re.compile(r'<FINAL_TRANSLATION>(.*?)<\/FINAL_TRANSLATION>', re.DOTALL)
# Searching for the pattern in the text.
match = pattern.search(text)
# Returning the matched content if it exists, otherwise an empty string.
return match.group(1) if match else ''
LATEX_GENERAL_TRANSLATION_PROMPT = "You are a translator. Translate material in the latex file to Finnish. Don't translate the comments. Do not alter the latex syntax, even if you deem it, for example, to miss some elements."
GENERAL_TRANSLATION_PROMPT_PLAIN_TEXT_AND_MD = "You are a translator. Translate the material to English." # Not thoroughly tested, but should work for basic usage.
TRANSLATE_AND_LOCALIZE_STUDY_MATERIAL_PROMPT_PLAIN_TEXT_OR_MD = "You are a translator. Localize and translate the study materials to English. Keep the meaning of the exercise in translation, but it does not need to be literal translation. If there are Finnish names change them to names used in England. Keep every actor the same."
# Update the language (and the role) in the prompt to suit your needs.
MULTI_STEP_TRANSLATION_PROMPT = "You are a translator and a software developer. Your native language is Finnish. Below you find text that is translated from English to Finnish. Review the translation. Focus on the fluency and accuracy of the text. First, explain your rationale. Then provide a final version in which you have added the improvements. Put the final version inside a custom tag <FINAL_TRANSLATION>."
# ------------ SET-UP ------------
# Set the initial prompt
INITIAL_PROMPT = ""
INITIAL_PROMPT = LATEX_GENERAL_TRANSLATION_PROMPT
# Load BERT tokenizer and model
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
......@@ -231,6 +299,7 @@ if file_content:
final_text = ""
previous_messages = None
print("input.md has been broken down to "+str(len(chunks)) + " chunks.")
if(False):
for i, chunk in enumerate(chunks):
print(" ")
print(" ")
......@@ -256,6 +325,21 @@ if file_content:
# In case the translation fails to an error when only part of the translation is done
# write the currently translated text also to the output.md
write_to_file("output.md", final_text)
# Create the better versions
if(False):
for i, chunk in enumerate(chunks):
#TODO add error management The translations should be gained through better way
translation = read_from_file('./debug/chunk'+str(i)+'_translation.md')
improved_trans = get_improved_translation_for_chunk(chunk, translation)
write_to_file("./debug/chunk" + str(i) + "_improved_translation.md", improved_trans)
final_text = final_text + improved_trans
#Parse the final_text
for i, chunk in enumerate(chunks):
final_translation = read_from_file('./debug/chunk'+str(i)+'_improved_translation.md')
final_translation = extract_final_translation(final_translation)
write_to_file("./debug/chunk" + str(i) + "_final_translation.md", final_translation)
final_text = final_text+final_translation
print(" ")
print(" ")
write_to_file("output.md", final_text)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment