mirror of
https://github.com/tymur999/braintok.git
synced 2025-08-03 15:20:38 +00:00
open ai API, file conversions
This commit is contained in:
31
fileFormatConverter.py
Normal file
31
fileFormatConverter.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import pdfplumber
|
||||||
|
import json
|
||||||
|
import json
|
||||||
|
import docx
|
||||||
|
|
||||||
|
|
||||||
|
def convert_file(inputFileName, outputFileName):
|
||||||
|
fileType = inputFileName.split('.')[1]
|
||||||
|
if fileType == "pdf":
|
||||||
|
with pdfplumber.open(f"{inputFileName}") as pdf_file:
|
||||||
|
content = []
|
||||||
|
for page in pdf_file.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
content.append(text)
|
||||||
|
|
||||||
|
elif fileType == "docx":
|
||||||
|
with open(f'{inputFileName}', "r") as docx_file:
|
||||||
|
doc = docx.Document(docx_file)
|
||||||
|
content = []
|
||||||
|
for para in doc.paragraphs:
|
||||||
|
content.append(para.text)
|
||||||
|
|
||||||
|
with open(f'{outputFileName}.jsonl', "w") as jsonl_file:
|
||||||
|
data = json.dumps(content)
|
||||||
|
jsonl_file.write(data)
|
||||||
|
print(f"PDF content saved to {outputFileName}.jsonl")
|
||||||
|
|
||||||
|
|
||||||
|
# inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
|
||||||
|
# outputFileName = input("enter output file name without extension:")
|
||||||
|
# convert_file(inputFileName, outputFileName)
|
26
main.py
Normal file
26
main.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from config import API_KEY
|
||||||
|
from openai import OpenAI
|
||||||
|
from fileFormatConverter import convert_file
|
||||||
|
client = OpenAI(api_key=API_KEY)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
inputFileName = input("enter input file name with extension (supported types: pdf, docx): ")
|
||||||
|
outputFileName = input("enter output file name without extension:")
|
||||||
|
convert_file(inputFileName, outputFileName)
|
||||||
|
|
||||||
|
with open(f'{outputFileName}.jsonl', 'rb') as file:
|
||||||
|
file_response = client.files.create(
|
||||||
|
file=file,
|
||||||
|
purpose="fine-tune"
|
||||||
|
)
|
||||||
|
stream = client.chat.completions.create(
|
||||||
|
model="gpt-4o",
|
||||||
|
messages=[{"role": "user", "content": "Give me an estimation how long this homework will take me"}],
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
for chunk in stream:
|
||||||
|
if chunk.choices[0].delta.content is not None:
|
||||||
|
print(chunk.choices[0].delta.content, end="")
|
||||||
|
|
||||||
|
main()
|
3
requirements.txt
Normal file
3
requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
openai
|
||||||
|
pdfplumber
|
||||||
|
python-docx
|
Reference in New Issue
Block a user