Skip to content

Instantly share code, notes, and snippets.

@ed-parsadanyan
Created March 28, 2024 17:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ed-parsadanyan/5ef2ecabd4860ac29d605b2317a05eed to your computer and use it in GitHub Desktop.
Save ed-parsadanyan/5ef2ecabd4860ac29d605b2317a05eed to your computer and use it in GitHub Desktop.
Example workflow pass page content to LLM
{
"name": "Example workflow pass page content to LLM",
"nodes": [
{
"parameters": {},
"id": "fd669182-01a3-4b61-87f7-a7ebf0ff71c8",
"name": "On new manual Chat Message",
"type": "@n8n/n8n-nodes-langchain.manualChatTrigger",
"position": [
320,
600
],
"typeVersion": 1,
"disabled": true
},
{
"parameters": {
"model": "gpt-4-1106-preview",
"options": {
"frequencyPenalty": 0.3,
"temperature": 0.7,
"timeout": 300000
}
},
"id": "d5ad979f-fd78-48e2-8ba8-a34405ab2908",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
723,
816
],
"typeVersion": 1,
"credentials": {
"openAiApi": {
"id": "rveqdSfp7pCRON1T",
"name": "Ted's Tech Talks OpenAi"
}
}
},
{
"parameters": {
"url": "={{ encodeURI($json.query.url) }}",
"options": {
"allowUnauthorizedCerts": true,
"response": {
"response": {
"neverError": true
}
}
}
},
"id": "be51d3a9-c3e4-4bfa-a484-fbfea3247d23",
"name": "HTTP Request",
"type": "n8n-nodes-base.httpRequest",
"position": [
1100,
1120
],
"typeVersion": 4.1,
"alwaysOutputData": false,
"onError": "continueRegularOutput"
},
{
"parameters": {
"fields": {
"values": [
{
"name": "HTML",
"stringValue": "={{ $json?.data.match(/<body[^>]*>([\\s\\S]*?)<\\/body>/i)[1] }}"
}
]
},
"include": "selected",
"includeFields": "HTML",
"options": {}
},
"id": "5082b0c8-323a-4f24-b8d7-75549799a635",
"name": "Exctract HTML Body",
"type": "n8n-nodes-base.set",
"position": [
1520,
1400
],
"typeVersion": 3.2
},
{
"parameters": {
"conditions": {
"boolean": [
{
"value1": "={{ $json.hasOwnProperty('error') }}",
"value2": true
}
]
}
},
"id": "87bd5f7f-0ebf-432a-9fd2-72683d1ed96f",
"name": "Is error?",
"type": "n8n-nodes-base.if",
"position": [
1300,
1120
],
"typeVersion": 1
},
{
"parameters": {
"fields": {
"values": [
{
"name": "page_content",
"stringValue": "={{ $('QUERY_PARAMS').first()?.json?.query?.url == null ? \"INVALID action_input. This should be an HTTP query string like this: \\\"?url=VALIDURL&method=SELECTEDMETHOD\\\". Only a simple string value is accepted. JSON object as an action_input is NOT supported!\" : JSON.stringify($json.error) }}"
}
]
},
"include": "selected",
"includeFields": "HTML",
"options": {}
},
"id": "8db629f0-7d79-41d8-8a7c-70582653fefe",
"name": "Stringify error message",
"type": "n8n-nodes-base.set",
"position": [
1520,
900
],
"typeVersion": 3.2
},
{
"parameters": {},
"id": "2f6fcd32-4051-4f6e-97c2-0c3afdc9777c",
"name": "Execute Workflow Trigger",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
500,
1120
],
"typeVersion": 1
},
{
"parameters": {
"fields": {
"values": [
{
"name": "HTML",
"stringValue": "={{ ($json.HTML || \"HTML BODY CONTENT FOR THIS SEARCH RESULT IS NOT AVAILABLE\").replace(/<script[^>]*>([\\s\\S]*?)<\\/script>|<style[^>]*>([\\s\\S]*?)<\\/style>|<noscript[^>]*>([\\s\\S]*?)<\\/noscript>|<!--[\\s\\S]*?-->|<iframe[^>]*>([\\s\\S]*?)<\\/iframe>|<object[^>]*>([\\s\\S]*?)<\\/object>|<embed[^>]*>([\\s\\S]*?)<\\/embed>|<video[^>]*>([\\s\\S]*?)<\\/video>|<audio[^>]*>([\\s\\S]*?)<\\/audio>|<svg[^>]*>([\\s\\S]*?)<\\/svg>/ig, '')}}"
}
]
},
"options": {}
},
"id": "8325de71-7fb8-4382-b446-9fcd40541f01",
"name": "Remove extra tags",
"type": "n8n-nodes-base.set",
"position": [
1720,
1400
],
"typeVersion": 3.2
},
{
"parameters": {
"fields": {
"values": [
{
"name": "HTML",
"stringValue": "={{ $json.HTML.replace(/href\\s*=\\s*\"(.+?)\"/gi, 'href=\"NOURL\"').replace(/src\\s*=\\s*\"(.+?)\"/gi, 'src=\"NOIMG\"')}}"
}
]
},
"options": {}
},
"id": "c90498c1-2c7a-41ee-8e59-36d7b60d1bf5",
"name": "Simplify output",
"type": "n8n-nodes-base.set",
"position": [
2100,
1300
],
"notesInFlow": true,
"typeVersion": 3.2,
"notes": "remove links and image URLs"
},
{
"parameters": {
"conditions": {
"string": [
{
"value1": "={{ $('CONFIG').first()?.json?.query?.method }}",
"operation": "contains",
"value2": "simplif"
}
]
}
},
"id": "62478eb8-6a57-4240-a13f-b82aa57bc39c",
"name": "Simplify?",
"type": "n8n-nodes-base.if",
"position": [
1920,
1400
],
"typeVersion": 1
},
{
"parameters": {
"fields": {
"values": [
{
"name": "query",
"type": "objectValue",
"objectValue": "={{ $json.query.substring($json.query.indexOf('?') + 1).split('&').reduce((result, item) => (result[item.split('=')[0]] = decodeURIComponent(item.split('=')[1]), result), {}) }}"
}
]
},
"options": {}
},
"id": "99798666-3415-40ea-9bca-f98b3de067aa",
"name": "QUERY_PARAMS",
"type": "n8n-nodes-base.set",
"position": [
700,
1120
],
"typeVersion": 3.2
},
{
"parameters": {
"fields": {
"values": [
{
"name": "query.maxlimit",
"type": "numberValue",
"numberValue": "={{ $json?.query?.maxlimit == null ? 70000 : Number($json?.query?.maxlimit) }}"
}
]
},
"options": {}
},
"id": "ee559654-4a0a-4fdd-9cfe-99a8f307ce7a",
"name": "CONFIG",
"type": "n8n-nodes-base.set",
"position": [
900,
1120
],
"typeVersion": 3.2
},
{
"parameters": {
"content": "### Convert the query string into JSON, apply the limit for a page length",
"height": 235.79999999999995,
"width": 556.25
},
"id": "47750987-601e-436c-bf89-1e35219f3c94",
"name": "Sticky Note",
"type": "n8n-nodes-base.stickyNote",
"position": [
461,
1054
],
"typeVersion": 1
},
{
"parameters": {
"content": "## Send an error message:\n1. If query param was incorrect, return the instruction. AI Agent should pick up on this and adapt the query on the next iteration.\n2. If the query is OK and an error was during the HTTP Request, then send back the original error message.",
"height": 285.7,
"width": 491
},
"id": "84577ffc-9ae1-4ddf-9cc0-2459a0c9301f",
"name": "Sticky Note1",
"type": "n8n-nodes-base.stickyNote",
"position": [
1460,
760
],
"typeVersion": 1
},
{
"parameters": {
"content": "## Post-processing of the HTML page:\n1. Keep only <BODY> content\n2. Remove inline <SCRIPT> tag entirely, as well as: NOSCRIPT, IFRAME, OBJECT, EMBED, VIDEO, AUDIO, SVG, and HTML comments.\n3. In case query parameter method=simplified, replace all page URLs (a href) and IMG (src) with NOURL / NOIMG - this may save up to 20% of the page length\n4. Convert the remaining HTML to Markdown. This step further reduces the length of the page: long HTML tags and styles are eliminated, but the markdown syntax keeps some page structure. This gives much better results compared to just a blank text.\n5. Finally, check the page length. If it's too long, send an \"ERROR: PAGE CONTENT TOO LONG\" instead of the actual page. Of course, you could split the page content in chunks, but sometimes long pages just don't have a needed content, so it makes little sense to burn tokens on them.",
"height": 472.5,
"width": 1200
},
"id": "124eb202-736e-4161-8ff8-c68661233767",
"name": "Sticky Note2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1460,
1080
],
"typeVersion": 1
},
{
"parameters": {
"content": "## Example ReAct AI Agent\n1. Agent Prompt is default\n2. Check the description of the HTTP_Request_Tool, it guides the agent to provide a query string with several parameters instead of a JSON object",
"height": 483.0226244343891,
"width": 616.8597285067872
},
"id": "7b1f3152-f3f1-4add-b340-428340064135",
"name": "Sticky Note3",
"type": "n8n-nodes-base.stickyNote",
"position": [
460,
460
],
"typeVersion": 1
},
{
"parameters": {
"html": "={{ $json.HTML }}",
"destinationKey": "page_content",
"options": {}
},
"id": "30648d01-c43f-4554-a359-096392640ede",
"name": "Convert to Markdown",
"type": "n8n-nodes-base.markdown",
"position": [
2280,
1400
],
"typeVersion": 1
},
{
"parameters": {
"fields": {
"values": [
{
"name": "page_content",
"stringValue": "={{ $json.page_content.length < $('CONFIG').first()?.json?.query?.maxlimit ? $json.page_content : \"ERROR: PAGE CONTENT TOO LONG\" }}"
},
{
"name": "page_length",
"type": "numberValue",
"numberValue": "={{ $json.page_content.length }}"
}
]
},
"include": "selected",
"options": {}
},
"id": "2dc671a3-bf8f-453f-af95-8f93a42ed953",
"name": "Send Page Content",
"type": "n8n-nodes-base.set",
"position": [
2480,
1400
],
"typeVersion": 3.2
},
{
"parameters": {
"name": "HTTP_Request_Tool",
"description": "Call this tool to fetch a webpage content. The input should be a stringified HTTP query parameter like this: \"?url=VALIDURL&method=SELECTEDMETHOD\". \"url\" parameter should contain the valid URL string. \"method\" key can be either \"full\" or \"simplified\". method=full will fetch the whole webpage content in the Markdown format, including page links and image links. method=simplified will return the Markdown content of the page but remove urls and image links from the page content for simplicity. Before calling this tool, think strategically which \"method\" to call. Best of all to use method=simplified. However, if you anticipate that the page request is not final or if you need to extract links from the page, pick method=full.",
"workflowId": "={{ $workflow.id }}",
"responsePropertyName": "page_content"
},
"id": "3a131428-721b-46bc-bb8e-af3691712d71",
"name": "HTTP_Request_Tool",
"type": "@n8n/n8n-nodes-langchain.toolWorkflow",
"position": [
843,
816
],
"typeVersion": 1
},
{
"parameters": {},
"id": "cf6b7363-6d86-4409-81da-dee31c570337",
"name": "When clicking \"Test workflow\"",
"type": "n8n-nodes-base.manualTrigger",
"typeVersion": 1,
"position": [
-340,
860
]
},
{
"parameters": {
"workflowId": "={{ $workflow.id }}",
"options": {}
},
"id": "4e36edd3-5ead-4078-b9b5-cce1c5a13d24",
"name": "Execute Workflow",
"type": "n8n-nodes-base.executeWorkflow",
"typeVersion": 1,
"position": [
80,
860
]
},
{
"parameters": {
"assignments": {
"assignments": [
{
"id": "e70206a7-b109-470e-b0bd-0baa801e50f1",
"name": "query",
"value": "?url=https://toronto.ctvnews.ca/&method=simplified",
"type": "string"
}
]
},
"options": {}
},
"id": "1f11a02a-5926-4d01-9283-24642db4835b",
"name": "Edit Fields",
"type": "n8n-nodes-base.set",
"typeVersion": 3.3,
"position": [
-120,
860
]
},
{
"parameters": {
"assignments": {
"assignments": [
{
"id": "0883584e-a58d-4fc5-a4f2-fb7cf768803a",
"name": "input",
"value": "=Here are some events from a Toronto news website. Please summarize:\n```\n{{ $json.page_content }}\n```",
"type": "string"
}
]
},
"options": {}
},
"id": "bdabf707-807c-40fe-9638-2e8829c74a9b",
"name": "Edit Fields1",
"type": "n8n-nodes-base.set",
"typeVersion": 3.3,
"position": [
300,
860
]
},
{
"parameters": {
"agent": "reActAgent",
"options": {
"humanMessageTemplate": "{input}\n\n{agent_scratchpad}",
"prefix": "Answer the following questions as best you can. You have access to the following tools:",
"suffixChat": "Begin! Reminder to always use the exact characters `Final Answer` when responding.",
"suffix": "Begin!\n\n\tQuestion: {input}\n\tThought:{agent_scratchpad}"
}
},
"id": "916e9350-5010-4089-b88e-703fce92e883",
"name": "ReAct AI Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
723,
596
],
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"CONFIG": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
},
"Is error?": {
"main": [
[
{
"node": "Stringify error message",
"type": "main",
"index": 0
}
],
[
{
"node": "Exctract HTML Body",
"type": "main",
"index": 0
}
]
]
},
"Simplify?": {
"main": [
[
{
"node": "Simplify output",
"type": "main",
"index": 0
}
],
[
{
"node": "Convert to Markdown",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "Is error?",
"type": "main",
"index": 0
}
]
]
},
"QUERY_PARAMS": {
"main": [
[
{
"node": "CONFIG",
"type": "main",
"index": 0
}
]
]
},
"Simplify output": {
"main": [
[
{
"node": "Convert to Markdown",
"type": "main",
"index": 0
}
]
]
},
"HTTP_Request_Tool": {
"ai_tool": [
[
{
"node": "ReAct AI Agent",
"type": "ai_tool",
"index": 0
}
]
]
},
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "ReAct AI Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Remove extra tags": {
"main": [
[
{
"node": "Simplify?",
"type": "main",
"index": 0
}
]
]
},
"Exctract HTML Body": {
"main": [
[
{
"node": "Remove extra tags",
"type": "main",
"index": 0
}
]
]
},
"Convert to Markdown": {
"main": [
[
{
"node": "Send Page Content",
"type": "main",
"index": 0
}
]
]
},
"Execute Workflow Trigger": {
"main": [
[
{
"node": "QUERY_PARAMS",
"type": "main",
"index": 0
}
]
]
},
"When clicking \"Test workflow\"": {
"main": [
[
{
"node": "Edit Fields",
"type": "main",
"index": 0
}
]
]
},
"Edit Fields": {
"main": [
[
{
"node": "Execute Workflow",
"type": "main",
"index": 0
}
]
]
},
"Execute Workflow": {
"main": [
[
{
"node": "Edit Fields1",
"type": "main",
"index": 0
}
]
]
},
"Edit Fields1": {
"main": [
[
{
"node": "ReAct AI Agent",
"type": "main",
"index": 0
}
]
]
}
},
"active": false,
"settings": {
"executionOrder": "v1",
"saveDataSuccessExecution": "all",
"saveManualExecutions": true,
"callerPolicy": "workflowsFromSameOwner"
},
"versionId": "022cb6de-57a3-4c44-99a2-9c886aa9e2b8",
"meta": {
"templateCredsSetupCompleted": true,
"instanceId": "fb924c73af8f703905bc09c9ee8076f48c17b596ed05b18c0ff86915ef8a7c4a"
},
"id": "lMxIo7KfZ95wGrp8",
"tags": []
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment