open ai API, file conversions

2025-08-03 15:20:38 +00:00 · 2024-10-20 12:20:48 -04:00
parent ec35a72ade
commit 3c4f501316
3 changed files with 60 additions and 0 deletions
--- a/fileFormatConverter.py
+++ b/fileFormatConverter.py
@@ -0,0 +1,31 @@
 import pdfplumber
 import json
 import json
 import docx
 def convert_file(inputFileName, outputFileName):
    fileType = inputFileName.split('.')[1]
    if fileType == "pdf":
        with pdfplumber.open(f"{inputFileName}") as pdf_file:
            content = []
            for page in pdf_file.pages:
                text = page.extract_text()
                content.append(text)
    elif fileType == "docx":
        with open(f'{inputFileName}', "r") as docx_file:
            doc = docx.Document(docx_file)
            content = []
            for para in doc.paragraphs:
                content.append(para.text)
    with open(f'{outputFileName}.jsonl', "w") as jsonl_file:
        data = json.dumps(content)
        jsonl_file.write(data)
        print(f"PDF content saved to {outputFileName}.jsonl")
 # inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
 # outputFileName = input("enter output file name without extension:")
 # convert_file(inputFileName, outputFileName)
--- a/main.py
+++ b/main.py
@@ -0,0 +1,26 @@
 from config import API_KEY
 from openai import OpenAI
 from fileFormatConverter import convert_file
 client = OpenAI(api_key=API_KEY)
 def main():
    inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
    outputFileName = input("enter output file name without extension:")
    convert_file(inputFileName, outputFileName)
    with open(f'{outputFileName}.jsonl', 'rb') as file:
        file_response = client.files.create(
            file=file,
            purpose="fine-tune"
        )
        stream = client.chat.completions.create(
            model="gpt-4o",
            messages=[{"role": "user", "content": "Give me an estimation how long this homework will take me"}],
            stream=True,
        )
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                print(chunk.choices[0].delta.content, end="")
 main()
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,3 @@
 openai
 pdfplumber
 python-docx