Skip to content

Instantly share code, notes, and snippets.

@vsizov
Last active December 15, 2015 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsizov/5271991 to your computer and use it in GitHub Desktop.
Save vsizov/5271991 to your computer and use it in GitHub Desktop.
duke config
<duke>
<schema>
<threshold>0.8</threshold>
<path>temp</path>
<property type="id">
<name>ID</name>
</property>
<property>
<name>FIRST_NAME</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.2</low>
<high>0.9</high>
</property>
<property>
<name>LAST_NAME</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.2</low>
<high>0.9</high>
</property>
<property>
<name>ZIPCODE</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.4</low>
<high>0.8</high>
</property>
<property>
<name>CITY</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.4</low>
<high>0.8</high>
</property>
<property>
<name>STATE</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.4</low>
<high>0.8</high>
</property>
<property>
<name>ADDRESS</name>
<comparator>no.priv.garshol.duke.comparators.WeightedLevenshtein</comparator>
<low>0.4</low>
<high>0.8</high>
</property>
<property>
<name>AGE</name>
<comparator>no.priv.garshol.duke.comparators.ExactComparator</comparator>
<low>0.4</low>
<high>0.8</high>
</property>
</schema>
<group>
<jdbc>
<param name="driver-class" value="com.mysql.jdbc.Driver"/>
<param name="connection-string" value="jdbc:mysql://localhost/duke_test"/>
<param name="user-name" value="root"/>
<param name="password" value="1"/>
<param name="query" value="
SELECT *, CONCAT_WS(', ', address1, address2) address, (YEAR(CURDATE())-YEAR(birthday)) as age FROM users
"/>
<column name="id" property="ID"/>
<column name="first_name" property="FIRST_NAME" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="last_name" property="LAST_NAME" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="zipcode" property="ZIPCODE" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="city" property="CITY" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="statecode" property="STATE" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="address" property="ADDRESS" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="age" property="AGE" cleaner="no.priv.garshol.duke.cleaners.DigitsOnlyCleaner" />
...
</jdbc>
</group>
<group>
<jdbc>
<param name="driver-class" value="com.mysql.jdbc.Driver"/>
<param name="connection-string" value="jdbc:mysql://localhost/duke_test"/>
<param name="user-name" value="root"/>
<param name="password" value="1"/>
<param name="query" value="
select offender_id, firstname, lastname, zipcode, city, state, if(dob='0000-00-00', null, dob) as dob, street1, age
FROM family_watchdog_offender
"/>
<column name="offender_id" property="ID"/>
<column name="firstname" property="FIRST_NAME" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="lastname" property="LAST_NAME" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="zipcode" property="ZIPCODE" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="city" property="CITY" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="state" property="STATE" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="street1" property="ADDRESS" cleaner="no.priv.garshol.duke.cleaners.LowerCaseNormalizeCleaner" />
<column name="age" property="AGE" cleaner="no.priv.garshol.duke.cleaners.DigitsOnlyCleaner" />
...
</jdbc>
</group>
</duke>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment