horosin/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Code for an article about LangChain structured outputs.
Article:
https://horosin.com/extracting-pdf-and-generating-json-data-with-gpts-langchain-and-nodejs
In this blog post, I will share how to use LangChain, a flexible framework for building AI-driven applications, to extract and generate structured JSON data with GPTs and Node.js. I'll provide code snippets and concise instructions to help you set up and run the project.

  
## structured-output.js
import { z } from "zod";
import { OpenAI } from "langchain/llms/openai";
import { PromptTemplate } from "langchain/prompts";
import {
  StructuredOutputParser,
  OutputFixingParser,
} from "langchain/output_parsers";

import * as dotenv from "dotenv";
dotenv.config();

export const run = async () => {
  const parser = StructuredOutputParser.fromZodSchema(
    z.object({
      name: z.string().describe("Human name"),
      surname: z.string().describe("Human surname"),
      age: z.number().describe("Human age"),
      appearance: z.string().describe("Human appearance description"),
      shortBio: z.string().describe("Short bio secription"),
      university: z.string().optional().describe("University name if attended"),
      gender: z.string().describe("Gender of the human"),
      interests: z
        .array(z.string())
        .describe("json array of strings human interests"),
    })
  );

  const formatInstructions = parser.getFormatInstructions();

  const prompt = new PromptTemplate({
    template:
      "Generate details of a hypothetical person.\n{format_instructions}\nPerson description: {inputText}",
    inputVariables: ["inputText"],
    partialVariables: { format_instructions: formatInstructions },
  });

  const model = new OpenAI({ temperature: 0.5, model: "gpt-3.5-turbo" });

  const input = await prompt.format({
    inputText: "A man, living in Poland.",
  });
  const response = await model.call(input);

  console.log(input);

  console.log(response);

  try {
    console.log(await parser.parse(response));
  } catch (e) {
    console.log("Failed to parse bad output: ", e);

    const fixParser = OutputFixingParser.fromLLM(
      new OpenAI({ temperature: 0, model: "gpt-3.5-turbo" }),
      parser
    );
    const output = await fixParser.parse(response);
    console.log("Fixed output: ", output);
  }
};

run();

## structured-pdf.js
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { z } from "zod";
import { OpenAI } from "langchain/llms/openai";
import { PromptTemplate } from "langchain/prompts";
import {
  StructuredOutputParser,
  OutputFixingParser,
} from "langchain/output_parsers";

import * as dotenv from "dotenv";
dotenv.config();

const loader = new PDFLoader("./elon.pdf");

const docs = await loader.load();

const parser = StructuredOutputParser.fromZodSchema(
  z.object({
    name: z.string().describe("Human name"),
    surname: z.string().describe("Human surname"),
    age: z.number().describe("Human age"),
    appearance: z.string().describe("Human appearance description"),
    shortBio: z.string().describe("Short bio secription"),
    university: z.string().optional().describe("University name if attended"),
    gender: z.string().describe("Gender of the human"),
    interests: z
      .array(z.string())
      .describe("json array of strings human interests"),
  })
);

const formatInstructions = parser.getFormatInstructions();

const prompt = new PromptTemplate({
  template:
    "Extract information from the person description.\n{format_instructions}\nThe response should be presented in a markdown JSON codeblock.\nPerson description: {inputText}",
  inputVariables: ["inputText"],
  partialVariables: { format_instructions: formatInstructions },
});

const model = new OpenAI({ temperature: 0.5, model: "gpt-3.5-turbo", maxTokens: 2000 });

const input = await prompt.format({
  inputText: docs[0].pageContent,
});

try {
  const output = await parser.parse(response)
  console.log(output);
} catch (e) {
  console.log("Failed to parse bad output: ", e);

  const fixParser = OutputFixingParser.fromLLM(
    new OpenAI({ temperature: 0, model: "gpt-3.5-turbo", maxTokens: 2000 }),
    parser
  );
  const output = await fixParser.parse(response);
  console.log("Fixed output: ", output);
}
	import { z } from "zod";
	import { OpenAI } from "langchain/llms/openai";
	import { PromptTemplate } from "langchain/prompts";
	import {
	StructuredOutputParser,
	OutputFixingParser,
	} from "langchain/output_parsers";

	import * as dotenv from "dotenv";
	dotenv.config();

	export const run = async () => {
	const parser = StructuredOutputParser.fromZodSchema(
	z.object({
	name: z.string().describe("Human name"),
	surname: z.string().describe("Human surname"),
	age: z.number().describe("Human age"),
	appearance: z.string().describe("Human appearance description"),
	shortBio: z.string().describe("Short bio secription"),
	university: z.string().optional().describe("University name if attended"),
	gender: z.string().describe("Gender of the human"),
	interests: z
	.array(z.string())
	.describe("json array of strings human interests"),
	})
	);

	const formatInstructions = parser.getFormatInstructions();

	const prompt = new PromptTemplate({
	template:
	"Generate details of a hypothetical person.\n{format_instructions}\nPerson description: {inputText}",
	inputVariables: ["inputText"],
	partialVariables: { format_instructions: formatInstructions },
	});

	const model = new OpenAI({ temperature: 0.5, model: "gpt-3.5-turbo" });

	const input = await prompt.format({
	inputText: "A man, living in Poland.",
	});
	const response = await model.call(input);

	console.log(input);

	console.log(response);

	try {
	console.log(await parser.parse(response));
	} catch (e) {
	console.log("Failed to parse bad output: ", e);

	const fixParser = OutputFixingParser.fromLLM(
	new OpenAI({ temperature: 0, model: "gpt-3.5-turbo" }),
	parser
	);
	const output = await fixParser.parse(response);
	console.log("Fixed output: ", output);
	}
	};

	run();
	import { PDFLoader } from "langchain/document_loaders/fs/pdf";
	import { z } from "zod";
	import { OpenAI } from "langchain/llms/openai";
	import { PromptTemplate } from "langchain/prompts";
	import {
	StructuredOutputParser,
	OutputFixingParser,
	} from "langchain/output_parsers";

	import * as dotenv from "dotenv";
	dotenv.config();

	const loader = new PDFLoader("./elon.pdf");

	const docs = await loader.load();

	const parser = StructuredOutputParser.fromZodSchema(
	z.object({
	name: z.string().describe("Human name"),
	surname: z.string().describe("Human surname"),
	age: z.number().describe("Human age"),
	appearance: z.string().describe("Human appearance description"),
	shortBio: z.string().describe("Short bio secription"),
	university: z.string().optional().describe("University name if attended"),
	gender: z.string().describe("Gender of the human"),
	interests: z
	.array(z.string())
	.describe("json array of strings human interests"),
	})
	);

	const formatInstructions = parser.getFormatInstructions();

	const prompt = new PromptTemplate({
	template:
	"Extract information from the person description.\n{format_instructions}\nThe response should be presented in a markdown JSON codeblock.\nPerson description: {inputText}",
	inputVariables: ["inputText"],
	partialVariables: { format_instructions: formatInstructions },
	});

	const model = new OpenAI({ temperature: 0.5, model: "gpt-3.5-turbo", maxTokens: 2000 });

	const input = await prompt.format({
	inputText: docs[0].pageContent,
	});

	try {
	const output = await parser.parse(response)
	console.log(output);
	} catch (e) {
	console.log("Failed to parse bad output: ", e);

	const fixParser = OutputFixingParser.fromLLM(
	new OpenAI({ temperature: 0, model: "gpt-3.5-turbo", maxTokens: 2000 }),
	parser
	);
	const output = await fixParser.parse(response);
	console.log("Fixed output: ", output);
	}