Skip to content

Instantly share code, notes, and snippets.

@code-vagabond
Created March 10, 2017 14:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save code-vagabond/ecd7df18952b9b85ee3dbd1b5f0878df to your computer and use it in GitHub Desktop.
Save code-vagabond/ecd7df18952b9b85ee3dbd1b5f0878df to your computer and use it in GitHub Desktop.
Mainzelliste configuration for OSSE
# Properties for mainzelliste
#
# Configuration template for running OSSE with identifying data
#
# General format: Key-value pairs, separated by spaces, tabs, "=", ":".
# Lines starting with "#" are comments. Comments appended to a line are not possible!
# See Javadoc for java.util.Properties#load(Reader) for details.
# Property names may contain letters, numbers, underscores and dots. Dots define a hierarchical
# structure on the property names.
# For a comprehensive documentation of configuration parameters see the configuration manual
# ("Konfigurationshandbuch").
# debug mode: If set to true, token handling is disabled (i.e. no tokens are necessary).
debug = false
# Name of this mainzelliste distribution instance.
dist = Mainzelliste
# Database setup
# OSSE uses PostgreSQL
db.driver = org.postgresql.Driver
db.url = jdbc:postgresql://localhost:5432/mainzelliste
db.username = mainzelliste
db.password = mainzelliste
# Logging
# Default log location for OSSE installation
log.filename = /var/log/mainzelliste/mainzelliste.log
log.level = INFO
# Field definitions
#
# Every input field must be defined here with field.{fieldname}.type = {fieldtype},
# where fieldname is a unique identifier and fieldtype the name of a subclass of Field.
# Input data from the field will be stored in objects of that class.
#
# Every field defined here must either be contained in the input form
# WEB-INF/jsp/patientFormElements.jsp (with corresponding "name"-Tag) or
# be provided in the createPatient Token
# In order to deactivate fields temporarily, it is sufficient to comment out the field definition.
field.vorname.type = PlainTextField
field.nachname.type = PlainTextField
field.geburtsname.type = PlainTextField
field.geburtstag.type = IntegerField
field.geburtsmonat.type = IntegerField
field.geburtsjahr.type = IntegerField
field.plz.type = PlainTextField
field.ort.type = PlainTextField
field.location.type = PlainTextField
# Server-side validation
# validator.field.{fieldname}.required: Whether a field must not be empty
validator.field.vorname.required = true
validator.field.nachname.required = true
validator.field.geburtstag.required = true
validator.field.geburtsmonat.required = true
validator.field.geburtsjahr.required = true
# Date validation:
# validator.date.{i}.fields defines a list of fields, separated by commas.
# The date string is formed by concatenation of these fields.
# validator.date.{i}.format: A format definition (see java.text.SimpleDateFormat) by
# which the date string is interpreted. Validation checks if the parsed date is valid.
# Set fields which form the date and the format (when concatenating the fields).
# Multiple dates can be specified by supplying multiple definitions with consecutive
# numbers as {i}
validator.date.0.fields = geburtstag, geburtsmonat, geburtsjahr
validator.date.0.format = ddMMyyyy
# Exchange groups
#
# An exchange group denotes a set of fields whose values are considered interchangeable.
# The matcher (currently only EpilinkMatcher) compares the fields in each exchange group to
# all of its permutations and uses the best matching combination for final decision.
#
# An exchange group is defined by exchangeGroup.{id} = {fieldlist}, where id is a sequential number
# and fieldlist a comma-separated list of field names
exchangeGroup.0 = vorname, nachname, geburtsname
# Field transformations
#
# Transformations for a field {fieldname} are defined by
# field.{fieldname}.transformers = {transformerlist}, where
# transformerlist is a comma-separated list of names of subclasses of FieldTransformer.
#
# The first transformer in the list is applied to the input field as defined in the field definition;
# i.e. the type parameter IN of the first transformer must match the class of the field.
# Every subsequent transformer is applied to the output of its predecessor; i.e. its type parameter IN
# must match type parameter OUT of the preceding transformer.
field.vorname.transformers = StringNormalizer, FirstNameDecomposer
field.nachname.transformers = StringNormalizer, GermanLastNameDecomposer
field.geburtsname.transformers = StringNormalizer, GermanLastNameDecomposer
# field comparators
field.vorname.comparator = NGramComparator
field.nachname.comparator = NGramComparator
field.geburtsname.comparator = NGramComparator
field.geburtstag.comparator = BinaryFieldComparator
field.geburtsmonat.comparator = BinaryFieldComparator
field.geburtsjahr.comparator = BinaryFieldComparator
field.plz.comparator = BinaryFieldComparator
field.ort.comparator = NGramComparator
# Matcher definition
# Property matcher defines the class name of the matcher to use, which must be a subclass
# of Matcher. Currently only EpilinkMatcher is supported.
matcher = EpilinkMatcher
# Config for Epilink Matcher
#
# matcher.epilink.{fieldname}.frequency: Defines the assumed mean frequency of values
# for field {fieldname}. This corresponds to the u-probability in the Fellegi-Sunter-Model
# and can be estimated by the reciprocal of the number of distinct values (i.e. the
# frequency of "month" is 1 / 12).
matcher.epilink.vorname.frequency = 0.000235
matcher.epilink.nachname.frequency = 0.0000271
matcher.epilink.geburtsname.frequency = 0.0000271
# 1 / 30 (approximate average number of days in a month)
matcher.epilink.geburtstag.frequency = 0.0333
# 1 / 12
matcher.epilink.geburtsmonat.frequency = 0.0833
# example value: birth years for an age range of 35 years (40 - 75).
matcher.epilink.geburtsjahr.frequency = 0.0286
matcher.epilink.plz.frequency = 0.01
matcher.epilink.ort.frequency = 0.01
# matcher.epilink.{fieldname}.error_rate defines the assumed error_rate for
# field {fieldname} (1 - m-probability)
#
# The supplied number stem from the evaluation of a German cancer registry
matcher.epilink.vorname.errorRate = 0.01
matcher.epilink.nachname.errorRate = 0.008
matcher.epilink.geburtsname.errorRate = 0.008
matcher.epilink.geburtstag.errorRate = 0.005
matcher.epilink.geburtsmonat.errorRate = 0.002
matcher.epilink.geburtsjahr.errorRate = 0.004
matcher.epilink.plz.errorRate = 0.04
matcher.epilink.ort.errorRate = 0.04
# matcher.epilink.threshold.match: Defines the minimum weight (in the interval [0,1])
# for which a pair of records is considered a definite match.
#
# matcher.epilink.threshold_non_match: Defines the weight (in the interval [0,1])
# below which a pair of records is considered a definite non-match. Must be less than or equal
# to matcher.epilink.threshold_match.
#
# Record pairs with a weight w with threshold_non_match <= w < threshold_match are
# considered possible matches and lead to a tentative PID.
matcher.epilink.threshold_match = 0.95
matcher.epilink.threshold_non_match = 0.7
# ID-Generators
#
# idgenerators: A comma-seperated list of identifiers of ID-Generators, corresponding to ID types.
# For every generator:
# idgenerator.{identifier}: Class which implements the ID generator.
# additional parameters depend on the ID generator used.
#
# Additional params for PIDGenerator (k1, k2, k3, rndwidth) are defined here.
idgenerators = pid, psn
idgenerator.pid = PIDGenerator
idgenerator.pid.k1 = 1
idgenerator.pid.k2 = 2
idgenerator.pid.k3 = 3
# rndwidth is optional (default 0)
idgenerator.pid.rndwidth = 0
idgenerator.psn = SimpleIDGenerator
# Output of result
# Whether to show a result page
result.show = false
# Whether to include Idat in the result page.
result.printIdat = false
# xDAT servers
#
# Known Permissions / Token Types:
#
# - showSessionIds: see own sessions via GET /sessions
# - createSession: create session via POST /sessions
#
# - createToken: create tokens -- in addition, permission "tt_x" is req'd, where x is one of these TOKEN TYPES:
# - tt_addPatient: allows to generate one (1) ID. Token is invalidated afterwards even if an existing ID was returned.
# - tt_readPatients: allows to retreive IDAT for a patient, needed for Temp-ID-Handling (still in development)
# A passphrase used for authentication
servers.0.apiKey = mdatborg
servers.0.permissions = createSession;showSessionIds;deletePatient;createToken;readConfiguration;tt_addPatient;tt_readPatients;tt_editPatient
# Accepted origin addresses
servers.0.allowedRemoteAdresses = 0.255.255.255/1;255.255.255.255/1
servers.allowedOrigins =
# Allowed Format of callback
callback.allowedFormat = http[s]?://.*
# Session timeout
# A session will be deleted if not accessed for this amount of time, specified in minutes
# Default value if not specified: 10
sessionTimeout = 30
# Contact information
# Contact information for users, which will be displayed in the footer of HTML forms.
operator.contact =
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment