igstan/execution.log

## execution.log
$ ./to-csv.sed records.xml
111111H2,111AA2026
111111N1,111AA2026
111111Q1,111AA2026
111111U1,111AA2026
111111Z1,111AA2026

## records.xml
<record name="111111H2" />
<items>
  <field name="Electronic Identifier" value="1"/>
  <field name="Symbol" value="111AA2026"/>
  <field name="Full Symbol" value="111AA202622MARFUT"/>
  <field name="System Identifier" value="1"/>
  <field name="System Identifier Description" value="Description"/>
</items>
<record name="111111N1" />
<items>
  <field name="Electronic Identifier" value="2"/>
  <field name="Symbol" value="111AA2026"/>
  <field name="Full Symbol" value="111AA202621JULFUT"/>
  <field name="System Identifier" value="2"/>
  <field name="System Identifier Description" value="Description"/>
</items>
<record name="111111Q1" />
<items>
  <field name="Electronic Identifier" value="3"/>
  <field name="Symbol" value="111AA2026"/>
  <field name="Full Symbol" value="111AA202621AUGFUT"/>
  <field name="System Identifier" value="3"/>
  <field name="System Identifier Description" value="Description"/>
</items>
<record name="111111U1" />
<items>
  <field name="Electronic Identifier" value="4"/>
  <field name="Symbol" value="111AA2026"/>
  <field name="Full Symbol" value="111AA202621SEPFUT"/>
  <field name="System Identifier" value="4"/>
  <field name="System Identifier Description" value="Description"/>
</items>
<record name="111111Z1" />
<items>
  <field name="Electronic Identifier" value="5"/>
  <field name="Symbol" value="111AA2026"/>
  <field name="Full Symbol" value="111AA202621DECFUT"/>
  <field name="System Identifier" value="5"/>
  <field name="System Identifier Description" value="Description"/>
</items>

## to-csv.sed
#!/usr/bin/env sed -nEf

# -n flag to disable printing of unhandled lines
# -E flag to enable extended regular expressions
# -f flag to tell sed that the contents of this file are commands to execute

#
# sed uses two internal "variables" called the pattern space and the hold
# space. Each line read from input is stored inside the pattern space and
# all [s]ubstitution commands are performed on the pattern space. So this
# pattern space is effectively rewritten whenever sed reads a new line of
# input. If we want to remember things between read lines, we need to use
# the hold space. There are a few commands that allows us to move things
# between the two spaces, but in what follows, think of the two spaces as
# two variables with different lifetimes:
#
#   1. pattern space: remembers things during the processing of a line
#   2. hold space: remembers things even as we move across lines
#

# For all lines matching "<record" do all of the bracket-enclosed actions.
/<record/ {
  # The matched line is now store inside the pattern space.

  # Now, replace the read line with just the value of the name attribute,
  # so the pattern space contains just that attribute value. Makes use of
  # the \1 capturing group reference.
  s/<record name="([^"]+)" \/>/\1/

  # Set the value of the hold space to the value of the pattern space, i.e.,
  # the attribute value, because we need to read a few lines before doing
  # additional processing on it.
  h

  # Read the next three lines after the matched line. Each "n" command will
  # set the contents of the pattern space to the contents of the read line.
  n
  n
  n

  # The pattern space now contains the 3rd line after our initial "<record".

  # Keep only the attribute value: \1 is again a capturing group reference.
  s/^ *<field name="[^"]+" value="([^"]+)"\/>/\1/

  # Append a newline and then the pattern space to the hold space. This ensure
  # the order of the string in the hold space is: <record>\n<field>
  H

  # The hold space now contains the name of the "record" tag, a newline and
  # the name of the 3rd "field" element, so we can copy it back from there
  # to the pattern space in order to perform a final substitution and print
  # the result.
  g

  # Replace newlines with commas and [p]rint the result after substitution.
  s/\n/,/p
}
	$ ./to-csv.sed records.xml
	111111H2,111AA2026
	111111N1,111AA2026
	111111Q1,111AA2026
	111111U1,111AA2026
	111111Z1,111AA2026
	<record name="111111H2" />
	<items>
	<field name="Electronic Identifier" value="1"/>
	<field name="Symbol" value="111AA2026"/>
	<field name="Full Symbol" value="111AA202622MARFUT"/>
	<field name="System Identifier" value="1"/>
	<field name="System Identifier Description" value="Description"/>
	</items>
	<record name="111111N1" />
	<items>
	<field name="Electronic Identifier" value="2"/>
	<field name="Symbol" value="111AA2026"/>
	<field name="Full Symbol" value="111AA202621JULFUT"/>
	<field name="System Identifier" value="2"/>
	<field name="System Identifier Description" value="Description"/>
	</items>
	<record name="111111Q1" />
	<items>
	<field name="Electronic Identifier" value="3"/>
	<field name="Symbol" value="111AA2026"/>
	<field name="Full Symbol" value="111AA202621AUGFUT"/>
	<field name="System Identifier" value="3"/>
	<field name="System Identifier Description" value="Description"/>
	</items>
	<record name="111111U1" />
	<items>
	<field name="Electronic Identifier" value="4"/>
	<field name="Symbol" value="111AA2026"/>
	<field name="Full Symbol" value="111AA202621SEPFUT"/>
	<field name="System Identifier" value="4"/>
	<field name="System Identifier Description" value="Description"/>
	</items>
	<record name="111111Z1" />
	<items>
	<field name="Electronic Identifier" value="5"/>
	<field name="Symbol" value="111AA2026"/>
	<field name="Full Symbol" value="111AA202621DECFUT"/>
	<field name="System Identifier" value="5"/>
	<field name="System Identifier Description" value="Description"/>
	</items>
	#!/usr/bin/env sed -nEf

	# -n flag to disable printing of unhandled lines
	# -E flag to enable extended regular expressions
	# -f flag to tell sed that the contents of this file are commands to execute

	#
	# sed uses two internal "variables" called the pattern space and the hold
	# space. Each line read from input is stored inside the pattern space and
	# all [s]ubstitution commands are performed on the pattern space. So this
	# pattern space is effectively rewritten whenever sed reads a new line of
	# input. If we want to remember things between read lines, we need to use
	# the hold space. There are a few commands that allows us to move things
	# between the two spaces, but in what follows, think of the two spaces as
	# two variables with different lifetimes:
	#
	# 1. pattern space: remembers things during the processing of a line
	# 2. hold space: remembers things even as we move across lines
	#

	# For all lines matching "<record" do all of the bracket-enclosed actions.
	/<record/ {
	# The matched line is now store inside the pattern space.

	# Now, replace the read line with just the value of the name attribute,
	# so the pattern space contains just that attribute value. Makes use of
	# the \1 capturing group reference.
	s/<record name="([^"]+)" \/>/\1/

	# Set the value of the hold space to the value of the pattern space, i.e.,
	# the attribute value, because we need to read a few lines before doing
	# additional processing on it.
	h

	# Read the next three lines after the matched line. Each "n" command will
	# set the contents of the pattern space to the contents of the read line.
	n
	n
	n

	# The pattern space now contains the 3rd line after our initial "<record".

	# Keep only the attribute value: \1 is again a capturing group reference.
	s/^ *<field name="[^"]+" value="([^"]+)"\/>/\1/

	# Append a newline and then the pattern space to the hold space. This ensure
	# the order of the string in the hold space is: <record>\n<field>
	H

	# The hold space now contains the name of the "record" tag, a newline and
	# the name of the 3rd "field" element, so we can copy it back from there
	# to the pattern space in order to perform a final substitution and print
	# the result.
	g

	# Replace newlines with commas and [p]rint the result after substitution.
	s/\n/,/p
	}