baydakovss/openai_translate.py

## openai_translate.py
import openai
import nltk

nltk.download('punkt')

openai.api_key = "XXXXXXXX"

def split_sentences(text, max_length):
    """
    splitting text into chunks by sentences, not exceeding a specified maximum .
    """
    tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
    sentences = tokenizer.tokenize(text)
    result = []
    current_chunk = ""


    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_length:
            current_chunk += sentence + " "
        else:
            result.append(current_chunk.strip())
            current_chunk = sentence + " "
    if current_chunk:
        result.append(current_chunk.strip())
    return result

def translate_file(input_file, output_file, source_language, target_language, max_chunk_size):
    with open(input_file, "r") as f:
        text = f.read()

    chunks = split_sentences(text, max_chunk_size)

    with open(output_file, "w") as f:
        for chunk in chunks:
            chunk = chunk.replace('^M', '\n')
            chunk = chunk.replace('\n', '\\n')

            prompt = f"Translate the following from {source_language} to {target_language}: {chunk}"
            print(prompt + "\n")

            result = openai.ChatCompletion.create(
                #engine="text-davinci-003",
                engine="gpt-3.5-turbo",
                #prompt=chunk,
                prompt=prompt,
                #max_tokens=2000,
                max_tokens=1024,
                temperature=0.5,
                n=1,
                stop=None,
                #frequency_penalty=0,
                #presence_penalty=0,
                #timeout=60,
                )
            translation = result.choices[0].text.strip()
            print(result.choices[0].text.strip())
            f.write(translation + "\n")

input_file = "input.txt"
output_file = "output.txt"
source_language = "English"
target_language = "Russian"
max_chunk_size = 500

translate_file(input_file, output_file, source_language, target_language, max_chunk_size)
	import openai
	import nltk

	nltk.download('punkt')

	openai.api_key = "XXXXXXXX"

	def split_sentences(text, max_length):
	"""
	splitting text into chunks by sentences, not exceeding a specified maximum .
	"""
	tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
	sentences = tokenizer.tokenize(text)
	result = []
	current_chunk = ""


	for sentence in sentences:
	if len(current_chunk) + len(sentence) < max_length:
	current_chunk += sentence + " "
	else:
	result.append(current_chunk.strip())
	current_chunk = sentence + " "
	if current_chunk:
	result.append(current_chunk.strip())
	return result

	def translate_file(input_file, output_file, source_language, target_language, max_chunk_size):
	with open(input_file, "r") as f:
	text = f.read()

	chunks = split_sentences(text, max_chunk_size)

	with open(output_file, "w") as f:
	for chunk in chunks:
	chunk = chunk.replace('^M', '\n')
	chunk = chunk.replace('\n', '\\n')

	prompt = f"Translate the following from {source_language} to {target_language}: {chunk}"
	print(prompt + "\n")

	result = openai.ChatCompletion.create(
	#engine="text-davinci-003",
	engine="gpt-3.5-turbo",
	#prompt=chunk,
	prompt=prompt,
	#max_tokens=2000,
	max_tokens=1024,
	temperature=0.5,
	n=1,
	stop=None,
	#frequency_penalty=0,
	#presence_penalty=0,
	#timeout=60,
	)
	translation = result.choices[0].text.strip()
	print(result.choices[0].text.strip())
	f.write(translation + "\n")

	input_file = "input.txt"
	output_file = "output.txt"
	source_language = "English"
	target_language = "Russian"
	max_chunk_size = 500

	translate_file(input_file, output_file, source_language, target_language, max_chunk_size)