Skip to content

Instantly share code, notes, and snippets.

@sorphwer
Last active October 15, 2024 08:50
Show Gist options
  • Select an option

  • Save sorphwer/7dc0558cd88af0c15b113b29acdb96c9 to your computer and use it in GitHub Desktop.

Select an option

Save sorphwer/7dc0558cd88af0c15b113b29acdb96c9 to your computer and use it in GitHub Desktop.
Writing Supervisor with Dify API
doc = docx.Document(INPUT_FILEPATH)
w_list = []
comments = ''
conversation_id = None
#main loop
for p in doc.paragraphs:
p_text = p.text
if not p_text:
continue
#call dify
try:
res = None
res = dify_request(p_text)
if res:
d = extract_reply(res)#extract text from LLM's reply
if d:
w_list = html_to_worklist(d['stemm'])#gen worklist for docx operation
print(w_list)
comments = d['Explanation']#comment text
else:
print('ERROR: Bad reply format')
else:
print(f'ERROR: API Request got {res.status_code}: {res.json()}')
#docx operation
p.clear()
p.add_run()#place a blank run for following element addtion
for i in w_list:#create revision
if i[0] == 'delete':
add_del_revision_elements(p.runs[-1]._element,escape_xml_special_chars(i[1]))
elif i[0] == 'insert':
add_ins_revision_elements(p.runs[-1]._element,escape_xml_special_chars(i[1]))
else:
if not 'p' in i[1]: #todo: '['default', 'p']' and ['default', '/p'] may appear for no reason.
p.add_run(i[1])
add_comment(p,comments)#create comments
except Exception as e:
print(f'error when processing {p_text}')
if res:
print(f'API response: {res}')
print(e.args)
print('---')
print(traceback.format_exc())
doc.save(OUTPUT_FILEPATH)
# import docx
import docx #use bayoo-docx
import random
import typing
import datetime
import lxml
import re
import uuid
import requests
import traceback
from docx.enum.text import WD_COLOR_INDEX
from docx.enum.style import WD_STYLE_TYPE
from lxml import etree
INPUT_FILEPATH = 'Proposal-Example-1.docx'
OUTPUT_FILEPATH = 'output.docx'
REVISION_AUTHOR = 'Writing Supervisor - Version 1.0'
API_KEY = 'Your-key-here'
URL = 'https://your.site/v1/chat-messages'
#bypass proxy pattern of docx, insprired by https://github.com/python-openxml/python-docx/issues/566
def add_del_revision_elements(element:lxml.etree._Element,text:str):
#add a delete revision with key and a addition revision with value in a given element, which can be `run._element`
w_rsidR = random.randint(1, 100000)
author = REVISION_AUTHOR
date = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
w_id = random.randint(1,100000)
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
del_xml = f'''
<w:del xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:author="{author}" w:date="{date}" w:id="{w_id}">
<w:r w:rsidR="{w_rsidR}">
<w:delText>{text}</w:delText>
</w:r>
</w:del>
'''
tree = etree.fromstring(del_xml, parser=etree.XMLParser(ns_clean=True))
tree = etree.ElementTree(tree)
element.addnext(tree.getroot())
def add_ins_revision_elements(element:lxml.etree._Element,text:str):
w_rsidR = random.randint(1, 100000)
author = REVISION_AUTHOR
date = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')
w_id = random.randint(1,100000)
ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
ins_xml = f'''
<w:ins xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:author="{author}" w:date="{date}" w:id="{w_id+1}">
<w:r w:rsidR="{w_rsidR}">
<w:t>{text}</w:t>
</w:r>
</w:ins>
'''
# define the 'w' namespace
tree = etree.fromstring(ins_xml, parser=etree.XMLParser(ns_clean=True))
tree = etree.ElementTree(tree)
element.addnext(tree.getroot())
def html_to_worklist(html_content):
pattern = r'(<span style="color:(green|red);text-decoration:line-through;">(.*?)</span>|<span style="color:green;">(.*?)</span>|([^<>]+))'
matches = re.findall(pattern, html_content)
result = []
for match in matches:
if 'color:red' in match[0]:
result.append(['delete', match[2].strip()])
elif 'color:green' in match[0]:
result.append(['insert', match[3].strip()])
else:
default_text = match[4].strip()
if default_text:
result.append(['default', default_text])
return result
def add_comment(paragraph:docx.text.paragraph.Paragraph,comment):
paragraph.add_comment(comment,author=REVISION_AUTHOR,initials= 'od')
def extract_reply(text):
stemm = ''
exp = ''
match = re.search(r'<STEMM_Writing>(.*?)</STEMM_Writing>', text, re.DOTALL)
if match:
stemm = match.group(1).strip()
exp = text.split('</STEMM_Writing>')[1]
return {'stemm':stemm,'Explanation':exp}
else:
return None
def dify_request(query):
global conversation_id
user = 'jupyter-lab'
# api_key = API_KEY
api_key = API_KEY
# url = URL
url = URL
headers = {
'Authorization': 'Bearer '+api_key,
'Content-Type': 'application/json'
}
if conversation_id:
data = {
"inputs": {},
"query": query,
"conversation_id": conversation_id,
"user": user,
}
else:
data = {
"inputs": {},
"query": query,
"user": user,
}
response = requests.post(url, headers=headers, json=data)
if response.status_code == 200:
conversation_id = response.json()['conversation_id']
print('Dify API 200 ok')
return response.json()['answer']
else:
return response
def escape_xml_special_chars(xml_string):
# Escapes special characters in XML: &, <, >, ", '
return (xml_string.replace("&", "&amp;")
.replace("<", "&lt;")
.replace(">", "&gt;")
.replace('"', "&quot;")
.replace("'", "&apos;"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment