open ai API, file conversions

This commit is contained in:
Mark Goltsman
2024-10-20 12:20:48 -04:00
parent ec35a72ade
commit 3c4f501316
3 changed files with 60 additions and 0 deletions

31
fileFormatConverter.py Normal file
View File

@@ -0,0 +1,31 @@
import pdfplumber
import json
import json
import docx
def convert_file(inputFileName, outputFileName):
fileType = inputFileName.split('.')[1]
if fileType == "pdf":
with pdfplumber.open(f"{inputFileName}") as pdf_file:
content = []
for page in pdf_file.pages:
text = page.extract_text()
content.append(text)
elif fileType == "docx":
with open(f'{inputFileName}', "r") as docx_file:
doc = docx.Document(docx_file)
content = []
for para in doc.paragraphs:
content.append(para.text)
with open(f'{outputFileName}.jsonl', "w") as jsonl_file:
data = json.dumps(content)
jsonl_file.write(data)
print(f"PDF content saved to {outputFileName}.jsonl")
# inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
# outputFileName = input("enter output file name without extension:")
# convert_file(inputFileName, outputFileName)

26
main.py Normal file
View File

@@ -0,0 +1,26 @@
from config import API_KEY
from openai import OpenAI
from fileFormatConverter import convert_file
client = OpenAI(api_key=API_KEY)
def main():
inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
outputFileName = input("enter output file name without extension:")
convert_file(inputFileName, outputFileName)
with open(f'{outputFileName}.jsonl', 'rb') as file:
file_response = client.files.create(
file=file,
purpose="fine-tune"
)
stream = client.chat.completions.create(
model="gpt-4o",
messages=[{"role": "user", "content": "Give me an estimation how long this homework will take me"}],
stream=True,
)
for chunk in stream:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")
main()

3
requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
openai
pdfplumber
python-docx