Skip to content

Instantly share code, notes, and snippets.

@RafayAK
Created March 4, 2016 07:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save RafayAK/7c589f51eb03d17b9f0f to your computer and use it in GitHub Desktop.
Save RafayAK/7c589f51eb03d17b9f0f to your computer and use it in GitHub Desktop.
FileSysytem Crawler
package Crawler;
import java.awt.*;
import java.io.File;
import java.util.ArrayList;
import java.util.Scanner;
/**
* Created by Sanitarium on 3/4/2016.
*/
public class fileCrawler {
public WorkQueue workQ;
public ArrayList directories = new ArrayList();
static int i = 0;
private class Worker implements Runnable {
private WorkQueue queue;
public Worker(WorkQueue q) {
queue = q;
}
// since main thread has placed all directories into the workQ, we
// know that all of them are legal directories; therefore, do not need
// to try ... catch in the while loop below
public void run() {
String name;
while ((name = queue.remove()) != null) {
File file = new File(name);
String entries[] = file.list();
if (entries == null)
continue;
for (String entry : entries) {
if (entry.compareTo(".") == 0)
continue;
if (entry.compareTo("..") == 0)
continue;
String fn = name + "\\" + entry;
directories.add(fn);
System.out.println(fn);
}
}
}
}
public fileCrawler() {
workQ = new WorkQueue();
}
public Worker createWorker() {
return new Worker(workQ);
}
// need try ... catch below in case the directory is not legal
public void processDirectory(String dir) {
try{
File file = new File(dir);
if (file.isDirectory()) {
String entries[] = file.list();
if (entries != null)
{
workQ.add(dir);
}
for (String entry : entries) {
String subdir;
if (entry.compareTo(".") == 0)
continue;
if (entry.compareTo("..") == 0)
continue;
if (dir.endsWith("\\"))
subdir = dir+entry;
else
subdir = dir+"\\"+entry;
processDirectory(subdir);
}
}}catch(Exception e){}
}
public static void main(String Args[]) {
fileCrawler fc = new fileCrawler();
// now start all of the worker threads
int N = 5;
ArrayList<Thread> thread = new ArrayList<Thread>(N);
for (int i = 0; i < N; i++) {
Thread t = new Thread(fc.createWorker());
thread.add(t);
t.start();
}
// now place each directory into the workQ
fc.processDirectory("C:\\Users\\Sanitarium\\IdeaProjects\\FileCrawler\\src\\test\\Crawler\\TD");
// indicate that there are no more directories to add
fc.workQ.finish();
for (int i = 0; i < N; i++){
try {
thread.get(i).join();
} catch (Exception e) {};
}
System.out.println("Enter keyword to list corresponding paths: ");
Scanner sc = new Scanner(System.in);
String key = sc.nextLine();
System.out.println("******************************************\n");
for (int i =0; i< fc.directories.size() ; i++)
{
String temp = (String) fc.directories.get(i);
int j = temp.indexOf(key);
if (j != -1)
{
System.out.println(fc.directories.get(i));
}
}
}
}
package Crawler;
/**
* Created by Sanitarium on 3/4/2016.
*/
import java.util.*;
public class WorkQueue {
//
// since we are providing the concurrency control, can use non-thread-safe
// linked list
//
private LinkedList<String> workQ;
private boolean done; // no more directories to be added
private int size; // number of directories in the queue
public WorkQueue() {
workQ = new LinkedList<String>();
done = false;
size = 0;
}
public synchronized void add(String s) {
workQ.add(s);
size++;
notifyAll();
}
public synchronized String remove() {
String s;
while (!done && size == 0) {
try {
wait();
} catch (Exception e) {};
}
if (size > 0) {
s = workQ.remove();
size--;
notifyAll();
} else
s = null;
return s;
}
public synchronized void finish() {
done = true;
notifyAll();
}
}
package Crawler;
import org.junit.Test;
import java.util.ArrayList;
import static org.junit.Assert.*;
/**
* Created by Sanitarium on 3/4/2016.
*/
public class fileCrawlerTest {
@Test
public void testProcessDirectory() throws Exception {
fileCrawler f = new fileCrawler();
int N = 1;
ArrayList<Thread> thread = new ArrayList<Thread>(N);
for (int i = 0; i < N; i++) {
Thread t = new Thread(f.createWorker());
thread.add(t);
t.start();
}
f.processDirectory("C:\\Users\\Sanitarium\\IdeaProjects\\FileCrawler\\src\\test\\Crawler\\TD");
f.workQ.finish();
for (int i = 0; i < N; i++){
try {
thread.get(i).join();
} catch (Exception e) {};
}
String t = (String) f.directories.get(0);
assertEquals(t,"C:\\Users\\Sanitarium\\IdeaProjects\\FileCrawler\\src\\test\\Crawler\\TD\\Doc.txt" );
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment