Last active
October 23, 2019 18:03
-
-
Save greggirwin/0d6e3551420a7892f782b80a5fc44126 to your computer and use it in GitHub Desktop.
GEDCOM parser example (basic)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Red [ | |
references: http://user.it.uu.se/~andersa/gedcom/ch1.html#grammar | |
sample-files: [ | |
http://heiner-eichmann.de/gedcom/gedcom.htm | |
http://heiner-eichmann.de/gedcom/allged.ged | |
] | |
] | |
gedcom_main: [some gedcom_line] | |
gedcom_line: [ | |
level (print ['level =level]) | |
delim (print 'delim) | |
opt_xref_id (print ['xref_id =xref_id]) | |
tag (print ['tag =tag]) | |
opt_line_value (print ['line_value =line_value]) | |
terminator (print 'terminator) | |
] | |
alpha: charset [#"A" - #"Z" #"a" - #"z" #"_"] | |
alphanum: [alpha | digit] | |
any_char: [alpha | digit | otherchar | #"#" | space | "@@"] | |
delim: space | |
digit: charset [#"0" - #"9"] | |
escape: [ "@#" escape_text #"@" non_at] | |
escape_text: [copy =escape_text some any_char] | |
level: [copy =level some digit] ; (Do not use non-significant leading zeroes such as 02.) | |
line_item: [pointer | escape | any_char] | |
line_value: [copy =line_value some line_item] | |
non_at: [alpha | digit | otherchar | #"#" | space] | |
opt_line_value: [(=line_value: none) opt [delim line_value | delim]] | |
opt_xref_id: [(=xref_id: none) opt [pointer delim]] | |
otherchar: charset [ | |
#"^(21)" - #"^(22)" | |
#"^(24)" - #"^(2F)" | |
#"^(3A)" - #"^(3F)" | |
#"^(5B)" - #"^(5E)" | |
#"^(60)" ; ` | |
#"^(7B)" - #"^(7E)" | |
#"^(80)" - #"^(FF)" | |
] | |
; Any ASCII character except control characters (0x00 - 0x1F), alphanum, | |
; space ( ), number sign (#), at character (@), and the DEL character (0x7F). | |
pointer: [#"@" pointer_string (=xref_id: =pointer_string) #"@"] | |
pointer_char: [non_at] | |
pointer_string: [copy =pointer_string [alphanum some pointer_char]] | |
tag: [copy =tag some alphanum] | |
terminator: [cr lf | lf cr | cr | lf] | |
;------------------------------------------------------------------------------- | |
sample: {0 HEAD | |
1 SOUR PAF | |
2 NAME Personal Ancestral File | |
2 VERS 5.0 | |
1 DATE 30 NOV 2000 | |
1 GEDC | |
2 VERS 5.5 | |
2 FORM LINEAGE-LINKED | |
1 CHAR ANSEL | |
1 SUBM @U1@ | |
0 @I1@ INDI | |
1 NAME John /Smith/ | |
1 SEX M | |
1 FAMS @F1@ | |
0 @I2@ INDI | |
1 NAME Elizabeth /Stansfield/ | |
1 SEX F | |
1 FAMS @F1@ | |
0 @I3@ INDI | |
1 NAME James /Smith/ | |
1 SEX M | |
1 FAMC @F1@ | |
0 @F1@ FAM | |
1 HUSB @I1@ | |
1 WIFE @I2@ | |
1 MARR | |
1 CHIL @I3@ | |
0 @U1@ SUBM | |
1 NAME Submitter | |
0 TRLR | |
} | |
parse sample gedcom_main | |
parse read http://heiner-eichmann.de/gedcom/allged.ged gedcom_main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment