daformat/utf-spaces-test.js

## utf-spaces-test.js
// Spaces information was found on http://jkorpela.fi/chars/spaces.html
// and https://en.wikipedia.org/wiki/Tab_key#Unicode

{
  // Display booleans or ✅ ❌ emojis
  const useEmojiForBooleans = true;

  // Spaces are to be tested against the following regular expressions
  const testRegexps = [
    /\s/, // White-space charcater class
    /[ \t]/,  //
    /\S/,
    /[^ \t]/,
    /[ \u200A\u200B\u205F\u1680\u2000\u2001\u2002\u2009\u2008\t]/
  ];

  // We're testing against utf escaping sequences, the rest is informative
  const utfSpaces = [
    {
      name: 'Space',
      utf: '\u0020',
      breaking: true,
      width: 'Typically 1/4 em'
    },
    {
      name: 'Thin space',
      utf: '\u2009',
      breaking: true,
      width: '1/5 em, can be 1/6 em'
    },
    {
      name: 'Hair space',
      utf: '\u200A',
      breaking: true,
      width: 'Narrower than a thin space (less than 1/5 em or 1/6em)'
    },
    {
      name: 'Zero width space',
      utf: '\u200B',
      breaking: true,
      width: 'None (invisible character)'
    },
    {
      name: 'Medium mathematical space',
      utf: '\u205F',
      breaking: true,
      width: '4/18 em'
    },
    {
      name: 'Ogham space mark',
      utf: '\u1680',
      breaking: true,
      width: 'Usually represented by a 1em dash'
    },
    {
      name: 'Mongolian vowel separator',
      utf: '\u180E',
      breaking: false,
      width: 'None (invisible character)'
    },
    {
      name: 'EN quad',
      utf: '\u2000',
      breaking: true,
      width: '1 en (1/2 em)'
    },
    {
      name: 'EM quad',
      utf: '\u2001',
      breaking: true,
      width: '1 em'
    },
    {
      name: 'EN space',
      utf: '\u2002',
      breaking: true,
      width: '1 en (1/2 em)'
    },
    {
      name: 'EM space',
      utf: '\u2003',
      breaking: false,
      width: '1 em'
    },
    {
      name: 'Three-per-em space',
      utf: '\u2004',
      breaking: false,
      width: '1/3 em'
    },
    {
      name: 'Four-per-em space',
      utf: '\u2005',
      breaking: false,
      width: '1/4 em'
    },
    {
      name: 'Six-per-em space',
      utf: '\u2006',
      breaking: false,
      width: '1/6 em'
    },
    {
      name: 'Non breaking space',
      utf: '\u00A0',
      html: ['&nbsp;', '&NonBreakingSpace;', '&#160;', '&#x000A0;'],
      breaking: false,
      width: 'Typically 1/4 em, same as a regular space but usually not adjusted with justification'
    },
    {
      name: 'Narrow no-break space',
      utf: '\u202F',
      html: ['&nnbsp;', '&#8239;'],
      breaking: false,
      width: 'Narrower than a non-breaking or breaking space'
    },
    {
      name: 'Figure space',
      utf: '\u2007',
      html: ['&#8199;'],
      breaking: false,
      width: 'The width of digits (tabular space)'
    },
    {
      name: 'Punctuation space',
      utf: '\u2008',
      breaking: true,
      width: 'Width of a period (.)'
    },
    {
      name: 'Word joiner',
      utf: '\u2060',
      html: ['&wj;', '&8288;'],
      breaking: false,
      width: 'None (invisible character)'
    },
    {
      name: 'Ideographic space',
      utf: '\u3000',
      breaking: false,
      width: 'The width of ideographic (CJK) characters'
    },
    {
      name: 'Zero width no-break space (BOM often intepreted as)',
      utf: '\uFEFF',
      breaking: false,
      width: 'None (invisible character)'
    },
    {
      name: 'Character tabulation',
      utf: '\u0009',
      html: ['&Tab;', '&#9;'],
      width: 'Up to the next tab stop'
    },
    {
      name: 'Line tabulation',
      utf: '\u000B',
      width: 'doesn’t apply (vertical)'
    }
  ];

  // Emojy helper
  emojify = bool => {
    return (
      typeof bool !== "undefined" && useEmojiForBooleans ? (
        bool ? '✅' : '❌'
      ) :
      bool
    );
  }

  testTable = utfSpaces.map(space => {
    const testedSpace = {
      name: space.name,
      utf: escape(space.utf).replace('%', '\\').replace(/^(\\)([^u]{2})/, '$1u00$2'),
      'utf unescaped': space.utf,
      html: space.html && space.html.join(' ') || `&#${(space.utf).charCodeAt(0)};`,
      breaking: emojify(space.breaking)
    };

    for (regexp of testRegexps) {
      testedSpace[`${regexp}`] = emojify(regexp.test(space.utf))
    };
    return testedSpace;
  })

  console.table(testTable);
}
	// Spaces information was found on http://jkorpela.fi/chars/spaces.html
	// and https://en.wikipedia.org/wiki/Tab_key#Unicode

	{
	// Display booleans or ✅ ❌ emojis
	const useEmojiForBooleans = true;

	// Spaces are to be tested against the following regular expressions
	const testRegexps = [
	/\s/, // White-space charcater class
	/[ \t]/, //
	/\S/,
	/[^ \t]/,
	/[ \u200A\u200B\u205F\u1680\u2000\u2001\u2002\u2009\u2008\t]/
	];

	// We're testing against utf escaping sequences, the rest is informative
	const utfSpaces = [
	{
	name: 'Space',
	utf: '\u0020',
	breaking: true,
	width: 'Typically 1/4 em'
	},
	{
	name: 'Thin space',
	utf: '\u2009',
	breaking: true,
	width: '1/5 em, can be 1/6 em'
	},
	{
	name: 'Hair space',
	utf: '\u200A',
	breaking: true,
	width: 'Narrower than a thin space (less than 1/5 em or 1/6em)'
	},
	{
	name: 'Zero width space',
	utf: '\u200B',
	breaking: true,
	width: 'None (invisible character)'
	},
	{
	name: 'Medium mathematical space',
	utf: '\u205F',
	breaking: true,
	width: '4/18 em'
	},
	{
	name: 'Ogham space mark',
	utf: '\u1680',
	breaking: true,
	width: 'Usually represented by a 1em dash'
	},
	{
	name: 'Mongolian vowel separator',
	utf: '\u180E',
	breaking: false,
	width: 'None (invisible character)'
	},
	{
	name: 'EN quad',
	utf: '\u2000',
	breaking: true,
	width: '1 en (1/2 em)'
	},
	{
	name: 'EM quad',
	utf: '\u2001',
	breaking: true,
	width: '1 em'
	},
	{
	name: 'EN space',
	utf: '\u2002',
	breaking: true,
	width: '1 en (1/2 em)'
	},
	{
	name: 'EM space',
	utf: '\u2003',
	breaking: false,
	width: '1 em'
	},
	{
	name: 'Three-per-em space',
	utf: '\u2004',
	breaking: false,
	width: '1/3 em'
	},
	{
	name: 'Four-per-em space',
	utf: '\u2005',
	breaking: false,
	width: '1/4 em'
	},
	{
	name: 'Six-per-em space',
	utf: '\u2006',
	breaking: false,
	width: '1/6 em'
	},
	{
	name: 'Non breaking space',
	utf: '\u00A0',
	html: [' ', '&NonBreakingSpace;', ' ', ' '],
	breaking: false,
	width: 'Typically 1/4 em, same as a regular space but usually not adjusted with justification'
	},
	{
	name: 'Narrow no-break space',
	utf: '\u202F',
	html: ['&nnbsp;', ' '],
	breaking: false,
	width: 'Narrower than a non-breaking or breaking space'
	},
	{
	name: 'Figure space',
	utf: '\u2007',
	html: [' '],
	breaking: false,
	width: 'The width of digits (tabular space)'
	},
	{
	name: 'Punctuation space',
	utf: '\u2008',
	breaking: true,
	width: 'Width of a period (.)'
	},
	{
	name: 'Word joiner',
	utf: '\u2060',
	html: ['&wj;', '&8288;'],
	breaking: false,
	width: 'None (invisible character)'
	},
	{
	name: 'Ideographic space',
	utf: '\u3000',
	breaking: false,
	width: 'The width of ideographic (CJK) characters'
	},
	{
	name: 'Zero width no-break space (BOM often intepreted as)',
	utf: '\uFEFF',
	breaking: false,
	width: 'None (invisible character)'
	},
	{
	name: 'Character tabulation',
	utf: '\u0009',
	html: ['&Tab;', ' '],
	width: 'Up to the next tab stop'
	},
	{
	name: 'Line tabulation',
	utf: '\u000B',
	width: 'doesn’t apply (vertical)'
	}
	];

	// Emojy helper
	emojify = bool => {
	return (
	typeof bool !== "undefined" && useEmojiForBooleans ? (
	bool ? '✅' : '❌'
	) :
	bool
	);
	}

	testTable = utfSpaces.map(space => {
	const testedSpace = {
	name: space.name,
	utf: escape(space.utf).replace('%', '\\').replace(/^(\\)([^u]{2})/, '$1u00$2'),
	'utf unescaped': space.utf,
	html: space.html && space.html.join(' ') \|\| `&#${(space.utf).charCodeAt(0)};`,
	breaking: emojify(space.breaking)
	};

	for (regexp of testRegexps) {
	testedSpace[`${regexp}`] = emojify(regexp.test(space.utf))
	};
	return testedSpace;
	})

	console.table(testTable);
	}