Last active
October 15, 2024 08:50
-
-
Save sorphwer/7dc0558cd88af0c15b113b29acdb96c9 to your computer and use it in GitHub Desktop.
Writing Supervisor with Dify API
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| bayoo-docx |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| doc = docx.Document(INPUT_FILEPATH) | |
| w_list = [] | |
| comments = '' | |
| conversation_id = None | |
| #main loop | |
| for p in doc.paragraphs: | |
| p_text = p.text | |
| if not p_text: | |
| continue | |
| #call dify | |
| try: | |
| res = None | |
| res = dify_request(p_text) | |
| if res: | |
| d = extract_reply(res)#extract text from LLM's reply | |
| if d: | |
| w_list = html_to_worklist(d['stemm'])#gen worklist for docx operation | |
| print(w_list) | |
| comments = d['Explanation']#comment text | |
| else: | |
| print('ERROR: Bad reply format') | |
| else: | |
| print(f'ERROR: API Request got {res.status_code}: {res.json()}') | |
| #docx operation | |
| p.clear() | |
| p.add_run()#place a blank run for following element addtion | |
| for i in w_list:#create revision | |
| if i[0] == 'delete': | |
| add_del_revision_elements(p.runs[-1]._element,escape_xml_special_chars(i[1])) | |
| elif i[0] == 'insert': | |
| add_ins_revision_elements(p.runs[-1]._element,escape_xml_special_chars(i[1])) | |
| else: | |
| if not 'p' in i[1]: #todo: '['default', 'p']' and ['default', '/p'] may appear for no reason. | |
| p.add_run(i[1]) | |
| add_comment(p,comments)#create comments | |
| except Exception as e: | |
| print(f'error when processing {p_text}') | |
| if res: | |
| print(f'API response: {res}') | |
| print(e.args) | |
| print('---') | |
| print(traceback.format_exc()) | |
| doc.save(OUTPUT_FILEPATH) | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import docx | |
| import docx #use bayoo-docx | |
| import random | |
| import typing | |
| import datetime | |
| import lxml | |
| import re | |
| import uuid | |
| import requests | |
| import traceback | |
| from docx.enum.text import WD_COLOR_INDEX | |
| from docx.enum.style import WD_STYLE_TYPE | |
| from lxml import etree | |
| INPUT_FILEPATH = 'Proposal-Example-1.docx' | |
| OUTPUT_FILEPATH = 'output.docx' | |
| REVISION_AUTHOR = 'Writing Supervisor - Version 1.0' | |
| API_KEY = 'Your-key-here' | |
| URL = 'https://your.site/v1/chat-messages' | |
| #bypass proxy pattern of docx, insprired by https://github.com/python-openxml/python-docx/issues/566 | |
| def add_del_revision_elements(element:lxml.etree._Element,text:str): | |
| #add a delete revision with key and a addition revision with value in a given element, which can be `run._element` | |
| w_rsidR = random.randint(1, 100000) | |
| author = REVISION_AUTHOR | |
| date = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') | |
| w_id = random.randint(1,100000) | |
| ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} | |
| del_xml = f''' | |
| <w:del xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:author="{author}" w:date="{date}" w:id="{w_id}"> | |
| <w:r w:rsidR="{w_rsidR}"> | |
| <w:delText>{text}</w:delText> | |
| </w:r> | |
| </w:del> | |
| ''' | |
| tree = etree.fromstring(del_xml, parser=etree.XMLParser(ns_clean=True)) | |
| tree = etree.ElementTree(tree) | |
| element.addnext(tree.getroot()) | |
| def add_ins_revision_elements(element:lxml.etree._Element,text:str): | |
| w_rsidR = random.randint(1, 100000) | |
| author = REVISION_AUTHOR | |
| date = datetime.datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ') | |
| w_id = random.randint(1,100000) | |
| ns = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'} | |
| ins_xml = f''' | |
| <w:ins xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" w:author="{author}" w:date="{date}" w:id="{w_id+1}"> | |
| <w:r w:rsidR="{w_rsidR}"> | |
| <w:t>{text}</w:t> | |
| </w:r> | |
| </w:ins> | |
| ''' | |
| # define the 'w' namespace | |
| tree = etree.fromstring(ins_xml, parser=etree.XMLParser(ns_clean=True)) | |
| tree = etree.ElementTree(tree) | |
| element.addnext(tree.getroot()) | |
| def html_to_worklist(html_content): | |
| pattern = r'(<span style="color:(green|red);text-decoration:line-through;">(.*?)</span>|<span style="color:green;">(.*?)</span>|([^<>]+))' | |
| matches = re.findall(pattern, html_content) | |
| result = [] | |
| for match in matches: | |
| if 'color:red' in match[0]: | |
| result.append(['delete', match[2].strip()]) | |
| elif 'color:green' in match[0]: | |
| result.append(['insert', match[3].strip()]) | |
| else: | |
| default_text = match[4].strip() | |
| if default_text: | |
| result.append(['default', default_text]) | |
| return result | |
| def add_comment(paragraph:docx.text.paragraph.Paragraph,comment): | |
| paragraph.add_comment(comment,author=REVISION_AUTHOR,initials= 'od') | |
| def extract_reply(text): | |
| stemm = '' | |
| exp = '' | |
| match = re.search(r'<STEMM_Writing>(.*?)</STEMM_Writing>', text, re.DOTALL) | |
| if match: | |
| stemm = match.group(1).strip() | |
| exp = text.split('</STEMM_Writing>')[1] | |
| return {'stemm':stemm,'Explanation':exp} | |
| else: | |
| return None | |
| def dify_request(query): | |
| global conversation_id | |
| user = 'jupyter-lab' | |
| # api_key = API_KEY | |
| api_key = API_KEY | |
| # url = URL | |
| url = URL | |
| headers = { | |
| 'Authorization': 'Bearer '+api_key, | |
| 'Content-Type': 'application/json' | |
| } | |
| if conversation_id: | |
| data = { | |
| "inputs": {}, | |
| "query": query, | |
| "conversation_id": conversation_id, | |
| "user": user, | |
| } | |
| else: | |
| data = { | |
| "inputs": {}, | |
| "query": query, | |
| "user": user, | |
| } | |
| response = requests.post(url, headers=headers, json=data) | |
| if response.status_code == 200: | |
| conversation_id = response.json()['conversation_id'] | |
| print('Dify API 200 ok') | |
| return response.json()['answer'] | |
| else: | |
| return response | |
| def escape_xml_special_chars(xml_string): | |
| # Escapes special characters in XML: &, <, >, ", ' | |
| return (xml_string.replace("&", "&") | |
| .replace("<", "<") | |
| .replace(">", ">") | |
| .replace('"', """) | |
| .replace("'", "'")) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment