ianschmitz/worker.ts

## worker.ts
// This is an example endpoint built for use on Cloudflare Workers using the Hono app.
// This example uses Cloudflare AI to run their hosted llama model,
// but could easily be tweaked to suit consuming the OpenAI SDK directly.
// You could also just use the `fetch` API directly and skip using the SDK as well,
// you'd just have to set the appropriate HTTP headers and query params (API key, etc)
// See second example for a simpler case.
// References:
// - https://developers.cloudflare.com/workers/runtime-apis/streams/
// - https://developers.cloudflare.com/workers/platform/pricing/
// - For Cloudflare AI usage within worker: https://developers.cloudflare.com/workers-ai/platform/bindings/
app.get("/stream", async (c) => {
  const ai = new Ai(c.env.AI);

  const question = c.req.query("query");

  if (!question) {
    return new Response(null, {
      status: 400,
      statusText: "Must provide question param",
    });
  }

  const systemPrompt = `You are a helpful assistant.`;

  const stream = await ai.run("@cf/meta/llama-2-7b-chat-int8", {
    messages: [
      { role: "system", content: systemPrompt },
      { role: "user", content: question },
    ],
    stream: true,
  });

  // Note we aren't processing anything from the response stream directly,
  // we instead pipe it into a new `Response` that is returned to the consumer.
  // The time spent by the worker piping the response is not billable
  // under the "standard" (modern) billing model :)
  // A request like this will easily be under 10ms CPU time billable (usually around 2-3ms in my experience)
  // and thus can fit in the free tier of Cloudflare Workers as long as you have < 100,000 requests per day
  return new Response(stream, {
    headers: {
      "content-type": "text/event-stream",
    },
  });
});

app.get("/stream2", () => {
  // This returns the response as-is, all the response headers, body, etc. from the origin
  return fetch("https://someapi.com/foo", {
    headers: {
      Authentication: `Bearer ${token}`,
    },
  });
});
	// This is an example endpoint built for use on Cloudflare Workers using the Hono app.
	// This example uses Cloudflare AI to run their hosted llama model,
	// but could easily be tweaked to suit consuming the OpenAI SDK directly.
	// You could also just use the `fetch` API directly and skip using the SDK as well,
	// you'd just have to set the appropriate HTTP headers and query params (API key, etc)
	// See second example for a simpler case.
	// References:
	// - https://developers.cloudflare.com/workers/runtime-apis/streams/
	// - https://developers.cloudflare.com/workers/platform/pricing/
	// - For Cloudflare AI usage within worker: https://developers.cloudflare.com/workers-ai/platform/bindings/
	app.get("/stream", async (c) => {
	const ai = new Ai(c.env.AI);

	const question = c.req.query("query");

	if (!question) {
	return new Response(null, {
	status: 400,
	statusText: "Must provide question param",
	});
	}

	const systemPrompt = `You are a helpful assistant.`;

	const stream = await ai.run("@cf/meta/llama-2-7b-chat-int8", {
	messages: [
	{ role: "system", content: systemPrompt },
	{ role: "user", content: question },
	],
	stream: true,
	});

	// Note we aren't processing anything from the response stream directly,
	// we instead pipe it into a new `Response` that is returned to the consumer.
	// The time spent by the worker piping the response is not billable
	// under the "standard" (modern) billing model :)
	// A request like this will easily be under 10ms CPU time billable (usually around 2-3ms in my experience)
	// and thus can fit in the free tier of Cloudflare Workers as long as you have < 100,000 requests per day
	return new Response(stream, {
	headers: {
	"content-type": "text/event-stream",
	},
	});
	});

	app.get("/stream2", () => {
	// This returns the response as-is, all the response headers, body, etc. from the origin
	return fetch("https://someapi.com/foo", {
	headers: {
	Authentication: `Bearer ${token}`,
	},
	});
	});