Skip to content

Instantly share code, notes, and snippets.

@eidosam
Created January 23, 2023 12:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save eidosam/4e4563d4561fd151660eb2f7c5ff4de4 to your computer and use it in GitHub Desktop.
Save eidosam/4e4563d4561fd151660eb2f7c5ff4de4 to your computer and use it in GitHub Desktop.
BigQuery RegExp functions in Spark
https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#regexp_contains
https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#regexp_extract
https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#regexp_replace
package re2_udfs;
import org.apache.spark.sql.api.java.UDF2;
import com.google.re2j.Pattern;
public class RegexpContains implements UDF2<String, String, Boolean> {
private static final long serialVersionUID = -4480474501284793639L;
public Boolean call(String input, String pattern) throws Exception {
if (input == null) {
return null;
}
return Pattern.compile(pattern).matcher(input).find();
}
}
package re2_udfs;
import org.apache.spark.sql.api.java.UDF2;
import com.google.re2j.Matcher;
import com.google.re2j.Pattern;
public class RegexpExtract implements UDF2<String, String, String> {
private static final long serialVersionUID = 1L;
public String call(String input, String pattern) throws Exception {
if (input == null) {
return null;
}
Matcher m = Pattern.compile(pattern).matcher(input);
if (m.find()) {
return m.group(1);
} else {
return null;
}
}
}
package re2_udfs;
import org.apache.spark.sql.api.java.UDF3;
import com.google.re2j.Pattern;
public class RegexpReplace implements UDF3<String, String, String, String> {
private static final long serialVersionUID = -4978515450337236613L;
public String call(String input, String pattern, String replacement) throws Exception {
if (input == null) {
return null;
}
return Pattern.compile(pattern).matcher(input).replaceAll(replacement);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment