Skip to content

Instantly share code, notes, and snippets.

@superalsrk
Last active March 2, 2023 08:17
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save superalsrk/d8a33c5ce56b2bac89ab to your computer and use it in GitHub Desktop.
Save superalsrk/d8a33c5ce56b2bac89ab to your computer and use it in GitHub Desktop.
PathFilter.java
//在main job里进行如下调用来激活path filter
FileInputFormat.setInputPathFilter(job, TongSeqFilter.class);
package com.miaozhen.dataeye.input;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
/**
* 过滤掉文件名不包含 conf.get("org.stackbox.filePattern", "campaign_")的日志文件
*/
public class TongSeqFilter extends Configured implements PathFilter {
String filePattern = null;
Configuration conf;
FileSystem fs;
public boolean accept(Path path) {
try
{
if(!fs.isFile(path))
{
return true;
}
else
{
return path.toString().contains(filePattern);
}
}
catch(IOException ex)
{
ex.printStackTrace();
return false;
}
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
if(conf != null)
{
try
{
fs = FileSystem.get(conf);
filePattern = conf.get("org.stackbox.filePattern", "campaign_");
}
catch(IOException ex)
{
ex.printStackTrace();
}
}
System.out.println("Pattern is " + filePattern);
System.out.println("===========>");
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment