Skip to content

Instantly share code, notes, and snippets.

@nykula
Last active June 22, 2018 11:14
Show Gist options
  • Save nykula/1428c2ebd026eb14ed782ac9509a215d to your computer and use it in GitHub Desktop.
Save nykula/1428c2ebd026eb14ed782ac9509a215d to your computer and use it in GitHub Desktop.
#!/bin/sh
perl -i -pe 's|^\s*([^<].*)$|<p>$1</p>|g' "$1"
perl -i -pe 's/(\s|&nbsp;)+(–|—|-)\s+/&nbsp;&mdash; /g' "$1"
perl -i -pe 's|href=|href@=|g' "$1"
perl -i -pe 's|id=|id@=|g' "$1"
perl -i -pe 's|src=|src@=|g' "$1"
perl -i -pe 's|\s*\w+="[^"]*"||g' "$1"
perl -i -pe 's|@=|=|g' "$1"
perl -i -pe 's|<p><br\s*/?>|<p>|g' "$1"
perl -i -pe 's|</?span>||g' "$1"
perl -i -pe 's|</?img>||g' "$1"
perl -i -pe 's|&nbsp;</p>|</p>|g' "$1"
perl -i -pe 's|[\.:\s]*</h|</h|g' "$1"
sed -i '' -e '/<p><\/p>/d' "$1"
perl -i -pe 's|(&nbsp;\|\s)*<br\s*/?>-\s*|</p>\n<p>|g' "$1"
perl -i -pe 's|&nbsp;(</\w+>)$|$1|g' "$1"
perl -i -pe 's|^(<\w+>)&nbsp;|$1|g' "$1"
perl -i -pe 's|снує вірогідність|снує ймовірність|g' "$1"
perl -i -pe 's|<br\s*/?>[^) ]+\)\s*|</p>\n<p>- |g' "$1"
perl -i -pe 's|<br\s*/?>(\d{1,3})[^\d][)\.]?\s*|</p>\n<p>$1. |g' "$1"
perl -i -pe 's|(\d) -(\d)|$1-$2|g' "$1"
perl -i -pe 's|<p>\s*\d{1,3}[^\d][)\.]?(?:\s\|&nbsp;)*(.*)</p>|<li>$1</li>|g' "$1"
perl -i -pe 's|<p>\s*(?:-\|•)(?:\s\|&nbsp;)*(.*)</p>|<li>$1</li>|g' "$1"
perl -i -pe 's|<p>[^) ]+\)\s*(.*)</p>|<li>$1</li>|g' "$1"
perl -i -0pe 's|</(\w*[^il])>\n<li>|</$1>\n<ul>\n<li>|g' "$1"
perl -i -0pe 's|</li>\n<([^\/l])|</li>\n</ul>\n<$1|g' "$1"
perl -i -0pe 's|</li>\s*$|</li>\n</ul>|g' "$1"
perl -i -pe 's|(\d)&nbsp;&mdash;\s*(\d)|$1-$2|g' "$1"
perl -i -pe 's|<(li\|p)>\s*([^<][^<\.:]{0,70})\s*(\.\|:)\s*(.{140,})|<$1><strong>$2$3</strong> $4|g' "$1"
perl -i -pe 's|<(li\|p)>\s*([^<][^<&]{0,70}&nbsp;&mdash;)\s*(.{140,})|<$1><strong>$2</strong> $3|g' "$1"
perl -i -pe 's|<p>\s*([^<].{0,70})</p>|<h2>$1</h2>|g' "$1"
perl -i -0pe 's|</h2>\s*<h2>(.*?)</h2>|. $1</h2>|g' "$1"
perl -i -pe 's|\.\.+|\.|g' "$1"
perl -i -pe 's| :\. |. |g' "$1"
perl -i -pe 's|:\.&nbsp;&mdash; |: |g' "$1"
perl -i -pe 's|<p>(.{0,70})(:\|&nbsp;\|\s)*<br\s*/?>|<h2>$1</h2>\n<p>|g' "$1"
perl -i -pe 's/(&nbsp;|\s)*у\.(&nbsp;|\s)*е\.\s*/&nbsp;у.&nbsp;е. /g' "$1"
perl -i -pe 's/\s*:\s*/: /g' "$1"
perl -i -pe 's|<p>(.*?)<br\s*/?>|<p>$1</p>\n<p>|g' "$1"
perl -i -pe 's/^\s*//g' "$1"
perl -i -0pe 's|<tr>\s*<td>(.*?)</td>|<tr>\n<th>$1</th>|g' "$1"
perl -i -pe 's/(\d)(р|г)/$1 $2/g' "$1"
perl -i -pe 's/ »/»/g' "$1"
perl -i -pe 's/<p>\s+/<p>/g' "$1"
perl -i -pe 's/Являється/Є/g' "$1"
perl -i -pe 's/\s("|;|,|\))\s/$1 /g' "$1"
perl -i -pe 's/\s*\( / (/g' "$1"
perl -i -pe 's/(\s|\()(г|м)\.([^\s;,])/$1$2. $3/g' "$1"
perl -i -pe 's|(<p>(?:<em>)?)(<img [^>]*>)([^<])|<p>$2</p>\n$1$3|g' "$1"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment