Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save marcotcr/9ab4ba0f54d9a87f577adf6c36715b92 to your computer and use it in GitHub Desktop.
Save marcotcr/9ab4ba0f54d9a87f577adf6c36715b92 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import guidance\n",
"gpt4 = guidance.llms.OpenAI(\"gpt-4\")\n",
"chatgpt = guidance.llms.OpenAI(\"gpt-3.5-turbo\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-ff076ec0-9b74-4f75-8362-092aff8c6d6d\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-ff076ec0-9b74-4f75-8362-092aff8c6d6d\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>Please solve the equation x^2 + 5.0x - 14.0 = 0</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>To solve the equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± sqrt(b^2 - 4ac)) / 2a\n",
"\n",
"where a = 1, b = 5.0, and c = -14.0.\n",
"\n",
"Plugging in these values, we get:\n",
"\n",
"x = (-5.0 ± sqrt(5.0^2 - 4(1)(-14.0))) / 2(1)\n",
"x = (-5.0 ± sqrt(89.0)) / 2\n",
"\n",
"So the solutions are:\n",
"\n",
"x = (-5.0 + sqrt(89.0)) / 2 ≈ 1.44\n",
"x = (-5.0 - sqrt(89.0)) / 2 ≈ -6.44\n",
"\n",
"Therefore, the solutions to the equation x^2 + 5.0x - 14.0 = 0 are approximately 1.44 and -6.44.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"ff076ec0-9b74-4f75-8362-092aff8c6d6d\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Roots = 2, -7\n",
"evalz = guidance('''\n",
"{{~#system~}}\n",
"{{llm.default_system_prompt}}\n",
"{{~/system}}\n",
"\n",
"{{#user~}}\n",
"Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"{{~/user}}\n",
"{{#assistant~}}\n",
"{{gen 'answer' temperature=0 max_tokens=1200}}\n",
"{{~/assistant~}}''')\n",
"chatgpt_answer = evalz(llm=chatgpt)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"ChatGPT(3.5) gets it wrong"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-dd2c9449-a749-4f99-b62c-f9cd04f1e415\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-dd2c9449-a749-4f99-b62c-f9cd04f1e415\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>Please solve the equation x^2 + 5.0x - 14.0 = 0</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>To solve the quadratic equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± √(b²-4ac)) / 2a\n",
"\n",
"In this equation, a = 1, b = 5, and c = -14.\n",
"\n",
"x = (-(5) ± √((5)²-4(1)(-14))) / 2(1)\n",
"\n",
"x = (-5 ± √(25+56)) / 2\n",
"\n",
"x = (-5 ± √81) / 2\n",
"\n",
"x = (-5 ± 9) / 2\n",
"\n",
"Now, we have two possible solutions:\n",
"\n",
"x = (-5 + 9) / 2\n",
"x = 4 / 2\n",
"x = 2\n",
"\n",
"and\n",
"\n",
"x = (-5 - 9) / 2\n",
"x = -14 / 2\n",
"x = -7\n",
"\n",
"So, the solutions for the equation x^2 + 5.0x - 14.0 = 0 are x = 2 and x = -7.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"dd2c9449-a749-4f99-b62c-f9cd04f1e415\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gpt4_answer = evalz(llm=gpt4)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"GPT-4 gets it right. But can GPT-4 compare answers well?"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-0b54c393-bd89-413d-b5b1-f3073dee969f\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-0b54c393-bd89-413d-b5b1-f3073dee969f\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>I gave the following task to two assistants, and here is the response that they gave me.\n",
"TASK: Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"---\n",
"ASSISTANT 1:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant1}}'>To solve the quadratic equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± √(b²-4ac)) / 2a\n",
"\n",
"In this equation, a = 1, b = 5, and c = -14.\n",
"\n",
"x = (-(5) ± √((5)²-4(1)(-14))) / 2(1)\n",
"\n",
"x = (-5 ± √(25+56)) / 2\n",
"\n",
"x = (-5 ± √81) / 2\n",
"\n",
"x = (-5 ± 9) / 2\n",
"\n",
"Now, we have two possible solutions:\n",
"\n",
"x = (-5 + 9) / 2\n",
"x = 4 / 2\n",
"x = 2\n",
"\n",
"and\n",
"\n",
"x = (-5 - 9) / 2\n",
"x = -14 / 2\n",
"x = -7\n",
"\n",
"So, the solutions for the equation x^2 + 5.0x - 14.0 = 0 are x = 2 and x = -7.</span>\n",
"---\n",
"ASSISTANT 2:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant2}}'>To solve the equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± sqrt(b^2 - 4ac)) / 2a\n",
"\n",
"where a = 1, b = 5.0, and c = -14.0.\n",
"\n",
"Plugging in these values, we get:\n",
"\n",
"x = (-5.0 ± sqrt(5.0^2 - 4(1)(-14.0))) / 2(1)\n",
"x = (-5.0 ± sqrt(89.0)) / 2\n",
"\n",
"So the solutions are:\n",
"\n",
"x = (-5.0 + sqrt(89.0)) / 2 ≈ 1.44\n",
"x = (-5.0 - sqrt(89.0)) / 2 ≈ -6.44\n",
"\n",
"Therefore, the solutions to the equation x^2 + 5.0x - 14.0 = 0 are approximately 1.44 and -6.44.</span>\n",
"---\n",
"Which of the two assistants did a better job, and why?</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>Assistant 1 did a better job because they provided the correct solutions to the quadratic equation x^2 + 5.0x - 14.0 = 0, which are x = 2 and x = -7. Assistant 2 made an error in their calculations, resulting in incorrect solutions of approximately 1.44 and -6.44. The main difference between the two responses is that Assistant 1 correctly calculated the discriminant (b²-4ac) as 81, while Assistant 2 incorrectly calculated it as 89.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"0b54c393-bd89-413d-b5b1-f3073dee969f\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"gpt4 = guidance.llms.OpenAI(\"gpt-4\")\n",
"better = guidance('''\n",
"{{~#system~}}\n",
"{{llm.default_system_prompt}}\n",
"{{~/system}}\n",
"\n",
"{{#user~}}\n",
"I gave the following task to two assistants, and here is the response that they gave me.\n",
"TASK: Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"---\n",
"ASSISTANT 1:\n",
"{{assistant1}}\n",
"---\n",
"ASSISTANT 2:\n",
"{{assistant2}}\n",
"---\n",
"Which of the two assistants did a better job, and why?\n",
"{{~/user}}\n",
"\n",
"{{#assistant~}}\n",
"{{gen 'answer' temperature=0 max_tokens=1200}}\n",
"{{~/assistant~}}''')\n",
"better(llm=gpt4, assistant1=gpt4_answer['answer'], assistant2=chatgpt_answer['answer'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"It seems like GPT-4 can recognize the correct answer. But what if we change the order?"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-049d6a30-b86b-47c1-92e7-ab8674e00554\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-049d6a30-b86b-47c1-92e7-ab8674e00554\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>I gave the following task to two assistants, and here is the response that they gave me.\n",
"TASK: Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"---\n",
"ASSISTANT 1:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant1}}'>To solve the equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± sqrt(b^2 - 4ac)) / 2a\n",
"\n",
"where a = 1, b = 5.0, and c = -14.0.\n",
"\n",
"Plugging in these values, we get:\n",
"\n",
"x = (-5.0 ± sqrt(5.0^2 - 4(1)(-14.0))) / 2(1)\n",
"x = (-5.0 ± sqrt(89.0)) / 2\n",
"\n",
"So the solutions are:\n",
"\n",
"x = (-5.0 + sqrt(89.0)) / 2 ≈ 1.44\n",
"x = (-5.0 - sqrt(89.0)) / 2 ≈ -6.44\n",
"\n",
"Therefore, the solutions to the equation x^2 + 5.0x - 14.0 = 0 are approximately 1.44 and -6.44.</span>\n",
"---\n",
"ASSISTANT 2:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant2}}'>To solve the quadratic equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± √(b²-4ac)) / 2a\n",
"\n",
"In this equation, a = 1, b = 5, and c = -14.\n",
"\n",
"x = (-(5) ± √((5)²-4(1)(-14))) / 2(1)\n",
"\n",
"x = (-5 ± √(25+56)) / 2\n",
"\n",
"x = (-5 ± √81) / 2\n",
"\n",
"x = (-5 ± 9) / 2\n",
"\n",
"Now, we have two possible solutions:\n",
"\n",
"x = (-5 + 9) / 2\n",
"x = 4 / 2\n",
"x = 2\n",
"\n",
"and\n",
"\n",
"x = (-5 - 9) / 2\n",
"x = -14 / 2\n",
"x = -7\n",
"\n",
"So, the solutions for the equation x^2 + 5.0x - 14.0 = 0 are x = 2 and x = -7.</span>\n",
"---\n",
"Which of the two assistants did a better job, and why?</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>Assistant 1 did a better job because their response provided the correct solutions to the quadratic equation x^2 + 5.0x - 14.0 = 0. They correctly applied the quadratic formula and found the solutions to be approximately 1.44 and -6.44.\n",
"\n",
"Assistant 2 made a mistake in their calculations. They incorrectly calculated the discriminant (b²-4ac) as 81 instead of 89. As a result, their solutions (x = 2 and x = -7) are incorrect.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"049d6a30-b86b-47c1-92e7-ab8674e00554\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"better(llm=gpt4, assistant1=chatgpt_answer['answer'], assistant2=gpt4_answer['answer'])"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Nope. What if we provide the right answer with no explanation?"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-bba042ed-b688-4730-ba62-0ab2bb4cc058\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-bba042ed-b688-4730-ba62-0ab2bb4cc058\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>I gave the following task to two assistants, and here is the response that they gave me.\n",
"TASK: Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"---\n",
"ASSISTANT 1:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant1}}'>The roots are 2 and -7</span>\n",
"---\n",
"ASSISTANT 2:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant2}}'>To solve the equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± sqrt(b^2 - 4ac)) / 2a\n",
"\n",
"where a = 1, b = 5.0, and c = -14.0.\n",
"\n",
"Plugging in these values, we get:\n",
"\n",
"x = (-5.0 ± sqrt(5.0^2 - 4(1)(-14.0))) / 2(1)\n",
"x = (-5.0 ± sqrt(89.0)) / 2\n",
"\n",
"So the solutions are:\n",
"\n",
"x = (-5.0 + sqrt(89.0)) / 2 ≈ 1.44\n",
"x = (-5.0 - sqrt(89.0)) / 2 ≈ -6.44\n",
"\n",
"Therefore, the solutions to the equation x^2 + 5.0x - 14.0 = 0 are approximately 1.44 and -6.44.</span>\n",
"---\n",
"Which of the two assistants did a better job, and why?</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>Assistant 2 did a better job because they provided a clear and detailed explanation of the steps they took to solve the equation using the quadratic formula. They also gave the approximate decimal values of the solutions, which can be more useful in some contexts. Assistant 1 simply provided the roots without any explanation or context, which may not be as helpful for someone trying to understand the process of solving the equation.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"bba042ed-b688-4730-ba62-0ab2bb4cc058\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"right_answer_no_exp = 'The roots are 2 and -7'\n",
"better(llm=gpt4, assistant1=right_answer_no_exp, assistant2=chatgpt_answer['answer'])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div id=\"guidance-stop-button-d65221b6-a4b0-493b-b53f-59c2c7f897ec\" style=\"cursor: pointer; margin: 0px; display: none; float: right; padding: 3px; border-radius: 4px 4px 4px 4px; border: 0px solid rgba(127, 127, 127, 1); padding-left: 10px; padding-right: 10px; font-size: 13px; background-color: rgba(127, 127, 127, 0.25);\">Stop program</div><div id=\"guidance-content-d65221b6-a4b0-493b-b53f-59c2c7f897ec\"><pre style='margin: 0px; padding: 0px; padding-left: 8px; margin-left: -8px; border-radius: 0px; border-left: 1px solid rgba(127, 127, 127, 0.2); white-space: pre-wrap; font-family: ColfaxAI, Arial; font-size: 15px; line-height: 23px;'><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>system</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{llm.default_system_prompt}}'>You are a helpful assistant.</span></div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>user</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'>I gave the following task to two assistants, and here is the response that they gave me.\n",
"TASK: Please solve the equation x^2 + 5.0x - 14.0 = 0\n",
"---\n",
"ASSISTANT 1:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant1}}'>To solve the equation x^2 + 5.0x - 14.0 = 0, we can use the quadratic formula:\n",
"\n",
"x = (-b ± sqrt(b^2 - 4ac)) / 2a\n",
"\n",
"where a = 1, b = 5.0, and c = -14.0.\n",
"\n",
"Plugging in these values, we get:\n",
"\n",
"x = (-5.0 ± sqrt(5.0^2 - 4(1)(-14.0))) / 2(1)\n",
"x = (-5.0 ± sqrt(89.0)) / 2\n",
"\n",
"So the solutions are:\n",
"\n",
"x = (-5.0 + sqrt(89.0)) / 2 ≈ 1.44\n",
"x = (-5.0 - sqrt(89.0)) / 2 ≈ -6.44\n",
"\n",
"Therefore, the solutions to the equation x^2 + 5.0x - 14.0 = 0 are approximately 1.44 and -6.44.</span>\n",
"---\n",
"ASSISTANT 2:\n",
"<span style='background-color: rgba(0, 138.56128016, 250.76166089, 0.25); display: inline;' title='{{assistant2}}'>The roots are 2 and -7</span>\n",
"---\n",
"Which of the two assistants did a better job, and why?</div></div><div style='display: flex; border-bottom: 1px solid rgba(127, 127, 127, 0.2); align-items: center;'><div style='flex: 0 0 80px; opacity: 0.5;'>assistant</div><div style='flex-grow: 1; padding: 5px; padding-top: 10px; padding-bottom: 10px; margin-top: 0px; white-space: pre-wrap; margin-bottom: 0px;'><span style='background-color: rgba(0, 165, 0, 0.25); opacity: 1.0; display: inline;' title='{{gen &#x27;answer&#x27; temperature=0 max_tokens=1200}}'>Assistant 1 did a better job because they provided a clear and detailed explanation of the steps they took to solve the quadratic equation using the quadratic formula. They also showed the calculations and provided the approximate solutions for x. Assistant 2, on the other hand, only provided the roots without any explanation or context, and their answer is incorrect.</span></div></div></pre></div>\n",
"<script type=\"text/javascript\">(()=>{var t={296:(t,e,n)=>{var i=NaN,o=\"[object Symbol]\",r=/^\\s+|\\s+$/g,a=/^[-+]0x[0-9a-f]+$/i,s=/^0b[01]+$/i,c=/^0o[0-7]+$/i,d=parseInt,u=\"object\"==typeof n.g&&n.g&&n.g.Object===Object&&n.g,l=\"object\"==typeof self&&self&&self.Object===Object&&self,f=u||l||Function(\"return this\")(),h=Object.prototype.toString,p=Math.max,m=Math.min,g=function(){return f.Date.now()};function b(t){var e=typeof t;return!!t&&(\"object\"==e||\"function\"==e)}function y(t){if(\"number\"==typeof t)return t;if(function(t){return\"symbol\"==typeof t||function(t){return!!t&&\"object\"==typeof t}(t)&&h.call(t)==o}(t))return i;if(b(t)){var e=\"function\"==typeof t.valueOf?t.valueOf():t;t=b(e)?e+\"\":e}if(\"string\"!=typeof t)return 0===t?t:+t;t=t.replace(r,\"\");var n=s.test(t);return n||c.test(t)?d(t.slice(2),n?2:8):a.test(t)?i:+t}t.exports=function(t,e,n){var i,o,r,a,s,c,d=0,u=!1,l=!1,f=!0;if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");function h(e){var n=i,r=o;return i=o=void 0,d=e,a=t.apply(r,n)}function v(t){var n=t-c;return void 0===c||n>=e||n<0||l&&t-d>=r}function _(){var t=g();if(v(t))return w(t);s=setTimeout(_,function(t){var n=e-(t-c);return l?m(n,r-(t-d)):n}(t))}function w(t){return s=void 0,f&&i?h(t):(i=o=void 0,a)}function j(){var t=g(),n=v(t);if(i=arguments,o=this,c=t,n){if(void 0===s)return function(t){return d=t,s=setTimeout(_,e),u?h(t):a}(c);if(l)return s=setTimeout(_,e),h(c)}return void 0===s&&(s=setTimeout(_,e)),a}return e=y(e)||0,b(n)&&(u=!!n.leading,r=(l=\"maxWait\"in n)?p(y(n.maxWait)||0,e):r,f=\"trailing\"in n?!!n.trailing:f),j.cancel=function(){void 0!==s&&clearTimeout(s),d=0,i=c=o=s=void 0},j.flush=function(){return void 0===s?a:w(g())},j}},777:t=>{var e,n,i=Math.max,o=(e=function(t,e){return function(t,e,n){if(\"function\"!=typeof t)throw new TypeError(\"Expected a function\");return setTimeout((function(){t.apply(void 0,n)}),1)}(t,0,e)},n=i(void 0===n?e.length-1:n,0),function(){for(var t=arguments,o=-1,r=i(t.length-n,0),a=Array(r);++o<r;)a[o]=t[n+o];o=-1;for(var s=Array(n+1);++o<n;)s[o]=t[o];return s[n]=a,function(t,e,n){switch(n.length){case 0:return t.call(e);case 1:return t.call(e,n[0]);case 2:return t.call(e,n[0],n[1]);case 3:return t.call(e,n[0],n[1],n[2])}return t.apply(e,n)}(e,this,s)});t.exports=o}},e={};function n(i){var o=e[i];if(void 0!==o)return o.exports;var r=e[i]={exports:{}};return t[i](r,r.exports,n),r.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var i in e)n.o(e,i)&&!n.o(t,i)&&Object.defineProperty(t,i,{enumerable:!0,get:e[i]})},n.g=function(){if(\"object\"==typeof globalThis)return globalThis;try{return this||new Function(\"return this\")()}catch(t){if(\"object\"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{\"use strict\";const t=t=>{const e=new Set;do{for(const n of Reflect.ownKeys(t))e.add([t,n])}while((t=Reflect.getPrototypeOf(t))&&t!==Object.prototype);return e};function e(e,{include:n,exclude:i}={}){const o=t=>{const e=e=>\"string\"==typeof e?t===e:e.test(t);return n?n.some(e):!i||!i.some(e)};for(const[n,i]of t(e.constructor.prototype)){if(\"constructor\"===i||!o(i))continue;const t=Reflect.getOwnPropertyDescriptor(n,i);t&&\"function\"==typeof t.value&&(e[i]=e[i].bind(e))}return e}var i=n(777),o=n.n(i),r=n(296),a=n.n(r);class s{constructor(t,n){e(this),this.interfaceId=t,this.callbackMap={},this.data={},this.pendingData={},this.jcomm=new c(\"guidance_interface_target_\"+this.interfaceId,this.updateData,\"open\"),this.debouncedSendPendingData500=a()(this.sendPendingData,500),this.debouncedSendPendingData1000=a()(this.sendPendingData,1e3),n&&o()(n)}send(t,e){this.addPendingData(t,e),this.sendPendingData()}sendEvent(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.sendPendingData()}debouncedSendEvent500(t){for(const e of Object.keys(t))this.addPendingData(e,t[e]);this.debouncedSendPendingData500()}debouncedSend500(t,e){this.addPendingData(t,e),this.debouncedSendPendingData500()}debouncedSend1000(t,e){this.addPendingData(t,e),this.debouncedSendPendingData1000()}addPendingData(t,e){Array.isArray(t)||(t=[t]);for(const n in t)this.pendingData[t[n]]=e}updateData(t){t=JSON.parse(t.data);for(const e in t)this.data[e]=t[e];for(const e in t)e in this.callbackMap&&this.callbackMap[e](this.data[e])}subscribe(t,e){this.callbackMap[t]=e,o()((e=>this.callbackMap[t](this.data[t])))}sendPendingData(){this.jcomm.send_data(this.pendingData),this.pendingData={}}}class c{constructor(t,e,n=\"open\"){this._fire_callback=this._fire_callback.bind(this),this._register=this._register.bind(this),this.jcomm=void 0,this.callback=e,void 0!==window.Jupyter?\"register\"===n?Jupyter.notebook.kernel.comm_manager.register_target(t,this._register):(this.jcomm=Jupyter.notebook.kernel.comm_manager.new_comm(t),this.jcomm.on_msg(this._fire_callback)):void 0!==window._mgr&&(\"register\"===n?window._mgr.widgetManager.proxyKernel.registerCommTarget(t,this._register):(this.jcomm=window._mgr.widgetManager.proxyKernel.createComm(t),this.jcomm.open({},\"\"),this.jcomm.onMsg=this._fire_callback))}send_data(t){void 0!==this.jcomm?this.jcomm.send(t):console.error(\"Jupyter comm module not yet loaded! So we can't send the message.\")}_register(t,e){this.jcomm=t,this.jcomm.on_msg(this._fire_callback)}_fire_callback(t){this.callback(t.content.data)}}class d{constructor(t,n){e(this),this.id=t,this.comm=new s(t),this.comm.subscribe(\"append\",this.appendData),this.comm.subscribe(\"replace\",this.replaceData),this.comm.subscribe(\"event\",this.eventOccurred),this.element=document.getElementById(\"guidance-content-\"+t),this.stop_button=document.getElementById(\"guidance-stop-button-\"+t),this.stop_button.onclick=()=>this.comm.send(\"event\",\"stop\")}appendData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML+=t)}replaceData(t){t&&(this.stop_button.style.display=\"inline-block\",this.element.innerHTML=t)}eventOccurred(t){\"complete\"===t&&(this.stop_button.style.display=\"none\")}}window._guidanceDisplay=function(t,e){return new d(t,e)}})()})();; window._guidanceDisplay(\"d65221b6-a4b0-493b-b53f-59c2c7f897ec\");</script>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"better(llm=gpt4, assistant1=chatgpt_answer['answer'], assistant2=right_answer_no_exp)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Now it prefers the wrong answer regardless of order!"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "guidance",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.16"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment