Skip to content

Instantly share code, notes, and snippets.

@skaag
Created June 5, 2024 17:11
Show Gist options
  • Save skaag/8e4fc2dfeea26d7ee274384446f1107c to your computer and use it in GitHub Desktop.
Save skaag/8e4fc2dfeea26d7ee274384446f1107c to your computer and use it in GitHub Desktop.
Langflow Node that extracts data using RegEx
from typing import Optional, Union, List
import re
from langflow.custom import CustomComponent
from langflow.field_typing import Text
from langflow.schema import Record
class ExtractWithRegexComponent(CustomComponent):
display_name = "Extract with Regex"
description = "Extracts text from input based on a specified regular expression."
field_config = {
"input_text": {
"display_name": "Input Text",
"info": "The text from which to extract the pattern.",
},
"regex_pattern": {
"display_name": "Regex Pattern",
"info": "The regular expression pattern to use for extraction.",
},
"output_type": {
"display_name": "Output Type",
"info": "The type of output to return: 'text' or 'record'.",
"input_types": ["text", "record"],
},
}
def build(
self,
input_text: Text,
regex_pattern: Text,
output_type: str = "text",
) -> Union[Text, Record]:
if not input_text or not regex_pattern:
raise ValueError("Both 'input_text' and 'regex_pattern' must be provided and non-empty.")
# Compile the regex pattern
pattern = re.compile(regex_pattern)
match = pattern.search(input_text)
if match:
extracted_text = match.group(1) if match.groups() else match.group(0)
self.status = extracted_text
if output_type == "record":
return Record(data={"extracted_text": extracted_text})
else:
return extracted_text
else:
self.status = "No match found."
if output_type == "record":
return Record(data={"extracted_text": "No match found."})
else:
return "No match found."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment