diff --git a/fileFormatConverter.py b/fileFormatConverter.py new file mode 100644 index 0000000..0bb9080 --- /dev/null +++ b/fileFormatConverter.py @@ -0,0 +1,31 @@ +import pdfplumber +import json +import json +import docx + + +def convert_file(inputFileName, outputFileName): + fileType = inputFileName.split('.')[1] + if fileType == "pdf": + with pdfplumber.open(f"{inputFileName}") as pdf_file: + content = [] + for page in pdf_file.pages: + text = page.extract_text() + content.append(text) + + elif fileType == "docx": + with open(f'{inputFileName}', "r") as docx_file: + doc = docx.Document(docx_file) + content = [] + for para in doc.paragraphs: + content.append(para.text) + + with open(f'{outputFileName}.jsonl', "w") as jsonl_file: + data = json.dumps(content) + jsonl_file.write(data) + print(f"PDF content saved to {outputFileName}.jsonl") + + +# inputFileName = input("enter input file name with extension (supported types: pdf, docx): ") +# outputFileName = input("enter output file name without extension:") +# convert_file(inputFileName, outputFileName) \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..e2af2ef --- /dev/null +++ b/main.py @@ -0,0 +1,26 @@ +from config import API_KEY +from openai import OpenAI +from fileFormatConverter import convert_file +client = OpenAI(api_key=API_KEY) + + +def main(): + inputFileName = input("enter input file name with extension (supported types: pdf, docx): ") + outputFileName = input("enter output file name without extension:") + convert_file(inputFileName, outputFileName) + + with open(f'{outputFileName}.jsonl', 'rb') as file: + file_response = client.files.create( + file=file, + purpose="fine-tune" + ) + stream = client.chat.completions.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Give me an estimation how long this homework will take me"}], + stream=True, + ) + for chunk in stream: + if chunk.choices[0].delta.content is not None: + print(chunk.choices[0].delta.content, end="") + +main() \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fbacd07 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +openai +pdfplumber +python-docx \ No newline at end of file