Skip to content

Instantly share code, notes, and snippets.

@jandk
Created May 11, 2014 22:50
Show Gist options
  • Save jandk/b3ffa3fa5f5f616cfee0 to your computer and use it in GitHub Desktop.
Save jandk/b3ffa3fa5f5f616cfee0 to your computer and use it in GitHub Desktop.
{ ----------------- SubRip 1.17 source -------------------
Copyright (C) 2002 Brain
www.subrip.fr.st
submagic@netcourrier.com
See Unit01.pas for licence information.
}
unit Unit10;
interface
uses
Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms,
StdCtrls, Buttons, ExtCtrls, ComCtrls, TntStdCtrls, TntComCtrls,
TntExtCtrls, TntButtons, TntForms, TntSysUtils, TntClasses;
type
TForm10 = class(TTntForm)
BitBtnCorrect: TTntBitBtn;
BitBtn2: TTntBitBtn;
GroupBoxMain: TTntGroupBox;
RadioGroupCorrectIlLanguage: TTntRadioGroup;
ProgressBarIndividual: TTntProgressBar;
ProgressBarAll: TTntProgressBar;
CheckBoxRemoveSpaceBtNumbers: TTntCheckBox;
CheckBoxCorrectTwiceAp: TTntCheckBox;
CheckBoxCorrectPunctuation: TTntCheckBox;
CheckBoxCorrectOrthography: TTntCheckBox;
CheckBoxCorrectIl: TTntCheckBox;
CheckBoxCorrectCapitalLetters: TTntCheckBox;
ComboBoxLangOrtho: TTntComboBox;
CheckBoxCase: TTntCheckBox;
CheckBoxFormatWholeWordsOnly: TTntCheckBox;
procedure BitBtnCorrectClick(Sender: TObject);
procedure CheckBoxCorrectIlClick(Sender: TObject);
procedure FormShow(Sender: TObject);
procedure CheckBoxCorrectCapitalLettersClick(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
function GetUsedCharSet: Byte; //<> DEFAULT_CHARSET
procedure AllSubCorrectIl; //Test pour différentier les I et les l (Anglais, Français ou German)
procedure AllSubCorrectAp; //Test remplacer deux ' par un "
procedure AllSubCorrectPunctuation; //Added by MJQ (subrip@divx.pl)
procedure AllSubCorrectOrthography; //Added by MJQ (subrip@divx.pl)
procedure AllSubCorrectCapitalLetters; //Added by MJQ (subrip@divx.pl)
procedure AllSubRemoveSpaceBtNumbers; //1 993 --> 1993 etc.
procedure WholeWordFormat; //ai4spam@gmail.com
procedure CheckDic(var SDic: TTntStrings);
end;
var
Form10: TForm10;
implementation
uses Unit01, Unit03, UConfig, Unit16;
{$R *.DFM}
//------------------------------------------------------------------------------
function TForm10.GetUsedCharSet: Byte;
var lf: LOGFONT;
begin
if (Form3.FontDialog.Font.Charset=DEFAULT_CHARSET) and //DEFAULT_CHARSET
(GetObject(GetStockObject(SYSTEM_FONT), SizeOf(LOGFONT), @lf) <> 0) then
Result := lf.lfCharSet
else
Result := Form3.Font.Charset;
end;
{WORKS ON W9X ONLY:
var lf: tagNONCLIENTMETRICS;
FillChar(lf, SizeOf(lf), 0);
lf.cbSize:= SizeOf(lf);
if SystemParametersInfo(SPI_GETNONCLIENTMETRICS, lf.cbSize, @lf, 0)
then Result:= TFontCharset(lf.lfCaptionFont.lfCharSet);}
//------------------------------------------------------------------------------
procedure TForm10.BitBtnCorrectClick(Sender: TObject);
var
I: Byte;
begin
I := 0;
if CheckBoxCorrectTwiceAp.Checked then Inc(I);
if CheckBoxCorrectIl.Checked then Inc(I);
if CheckBoxCorrectPunctuation.Checked then Inc(I);
if CheckBoxCorrectOrthography.Checked then Inc(I);
if CheckBoxCorrectCapitalLetters.Checked then Inc(I);
if CheckBoxRemoveSpaceBtNumbers.Checked then Inc(I);
if CheckBoxFormatWholeWordsOnly.Checked then Inc(I);
ProgressBarIndividual.Max := Form1.SubTitles.NbSub;
ProgressBarIndividual.Step := 1;
ProgressBarAll.Max := Form1.SubTitles.NbSub * I;
ProgressBarAll.Step := 1;
if I > 0 then
begin
Screen.Cursor := crHourglass; //set hourglass cursor
try
if CheckBoxCorrectTwiceAp.Checked then AllSubCorrectAp;
if CheckBoxCorrectIl.Checked then AllSubCorrectIl;
if CheckBoxCorrectPunctuation.Checked then AllSubCorrectPunctuation;
if CheckBoxCorrectOrthography.Checked then AllSubCorrectOrthography;
if CheckBoxCorrectCapitalLetters.Checked then AllSubCorrectCapitalLetters;
if CheckBoxRemoveSpaceBtNumbers.Checked then AllSubRemoveSpaceBtNumbers;
if CheckBoxFormatWholeWordsOnly.Checked then WholeWordFormat;
Form3.AddAllSubTitles;
finally
Screen.Cursor := crDefault; //back to default cursor
end;
ProgressBarIndividual.Position := 0;
ProgressBarAll.Position := 0;
end;
end;
//------------------------------------------------------------------------------
procedure TForm10.CheckBoxCorrectIlClick(Sender: TObject);
begin
RadioGroupCorrectIlLanguage.Enabled := CheckBoxCorrectIl.Checked;
end;
//------------------------------------------------------------------------------
procedure TForm10.CheckBoxCorrectCapitalLettersClick(Sender: TObject);
begin
CheckBoxCase.Enabled := CheckBoxCorrectCapitalLetters.Checked;
if CheckBoxCorrectCapitalLetters.Checked then
CheckBoxCorrectPunctuation.Checked := True;
end;
//------------------------------------------------------------------------------
procedure TForm10.AllSubCorrectIl; //I <> l interchange
//# = replacement for note char, widely used (#lf I can't #)
var
St1, St2: Widestring;
WNbSup: Word;
BLine: Byte;
Takel, TakeI: Boolean;
//----------
procedure StCorrectIl;
var W1: Word;
//----------
function TrChar(X: Integer): WideChar;
var
Postn: Word;
i, j: Integer;
begin
Result := #0; Postn := W1;
if X > 0
then
for i := 1 to X do
begin
if Postn < Length(St1) then Inc(Postn) else Exit;
for j := 1 to 3 do //3 possible html tag styles
if St1[Postn] = '<' then
begin
if (Postn <= Length(St1) - 3) and (St1[Postn + 2] = '>') then Postn := Postn + 3;
if (Postn <= Length(St1) - 4) and (St1[Postn + 1] = '/') then Postn := Postn + 4;
end;
end
else
for i := 1 to -X do
begin
if Postn > 1 then Dec(Postn) else Exit;
for j := 1 to 3 do //3 possible html tag styles
if St1[Postn] = '>' then
begin
if (Postn > 3) and (St1[Postn - 2] = '<') then Postn := Postn - 3;
if (Postn > 4) and (St1[Postn - 2] = '/') then Postn := Postn - 4;
end;
end;
Result := St1[Postn];
end;
//----------
procedure StCorrectIl_En; //English
begin
if St1[W1] = 'l' then
begin
//so shity due the styles
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 't') and (TrChar(+2) = #39) and (TrChar(+3) = 's') then TakeI := True; //_lt's
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = #39) and (TrChar(+2) = 'm') and (TrChar(+3) = ' ') then TakeI := True; //_l'm_
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = #39) and (TrChar(+2) = 'd') and (TrChar(+3) = ' ') then TakeI := True; //_l'd_
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = #39) and (TrChar(+2) = 'v') and (TrChar(+3) = 'e') then TakeI := True; //_l've
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = #39) and (TrChar(+2) = 'l') and (TrChar(+3) = 'l') then TakeI := True; //_l'll
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) in [WideChar('f'), WideChar('n'), WideChar('s'), WideChar('t')]) and (TrChar(+2) = ' ') then TakeI := True; //_lf_, _ln_, _ls_, _lt_
//no English word beginning with "lc", "ld", "lh", "ln", "lr", "ls"
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) in [WideChar('c'), WideChar('d'), WideChar('h'), WideChar('n'), WideChar('r'), WideChar('s')]) then TakeI := True;
end;
end;
//----------
procedure StCorrectIl_Fr; //French
begin
//*unknown*: I think that for french neither "I" alone nor "l" alone can exist so we don't check this. So the only thing we can write is that before an apostrophe there must be an "l".
if ((TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = ' ')) or //but Brain as Frenchman: lonesome I doesn't exist
(TrChar(+1) = #39) then //*unknown*: An I followed by an apostrophe cannot exist. Must be an l.
begin TakeI := False; Takel := True; end;
end;
//----------
procedure StCorrectIl_It; //Italian; comments from *unknown*
begin
//Hint: "i" alone CAN exist (even capital: after a full stop) while l alone can't, never
//l' processing
if (TrChar(+1) = #39) then //Ita always has l before apostrophe (if ' is not used as an accent for a capital letter! [rare, and followed by whitespace])
if (TrChar(+2) in [WideChar('a'), WideChar('A'), WideChar('e'), WideChar('E'), WideChar('i'), WideChar('I'), WideChar('l'), WideChar('o'), WideChar('O'), WideChar('u'), WideChar('U'), WideChar('h'), WideChar('H'), //Ita always has a vowel sound (rarely accented) or h after the apostrophe
WideChar(#224), {all ANSI_CHARSET} {accented a}
WideChar(#232), {e with grave accent}
WideChar(#233), {e with acute accent}
WideChar(#236), {accented i}
WideChar(#242), {accented o}
WideChar(#249)]) {accented u}
then begin Takel := True; TakeI := False; end;
//else: unknown pattern!? Probably an English sentence within an Italian movie. We rely on the general algorithm.
end;
//----------
procedure StCorrectIl_Ge; //German
begin
if St1[W1] = 'l' then
//no German word beginning with "lc", "ld", "lh", "ln", "lr", "ls"
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')])
and (TrChar(+1) in [WideChar('c'), WideChar('d'), WideChar('h'), WideChar('n'), WideChar('r'), WideChar('s')]) then TakeI := True;
end;
//----------
procedure StCorrectIl_Cz; //Czech
begin
if St1[W1] = 'l' then
//no Czech word beginning with "lc", "ld", "lr"
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')])
and (TrChar(+1) in [WideChar('c'), WideChar('d'), WideChar('r')]) then TakeI := True;
end;
//----------
procedure StCorrectIl_Pl; //Polish
begin
if St1[W1] = 'l' then
//no Czech word beginning with "lc", "ld", "lr"
if (TrChar(-1) in [WideChar('.'), WideChar(','), WideChar(' '), WideChar('"'), WideChar('#')])
and (TrChar(+1) in
[WideChar('b'), WideChar('c'), WideChar('d'), WideChar('f'), WideChar('h'), WideChar('j'),
WideChar('k'), WideChar('r'), WideChar('s'), WideChar('t'), WideChar('z'), WideChar('æ'),
WideChar('³'), WideChar('ñ'), WideChar('Ÿ')]) then begin Takel := False; TakeI := True; end;
end;
procedure StCorrectIl_Sp; //Spanish
begin
if St1[W1] = 'l' then
if (TrChar(-1) in [WideChar('.'), WideChar(','), WideChar(' '), WideChar('"'), WideChar('#'), WideChar('¿'), WideChar('¡')])
and (TrChar(+1) in
[WideChar('b'), WideChar('c'), WideChar('d'), WideChar('f'), WideChar('g'), WideChar('h'),
WideChar('j'), WideChar('k'), WideChar('p'), WideChar('q'), WideChar('r'), WideChar('s'),
WideChar('t'), WideChar('v'), WideChar('x'), WideChar('y'), WideChar('z'),
WideChar('á'), WideChar('é'), WideChar('í'), WideChar('ú'), WideChar('ü'),
WideChar('ñ')]) then begin Takel := False; TakeI := True; end;
if St1[W1] = 'I' then
if (TrChar(+1) in [WideChar('á'), WideChar('é'), WideChar('í'), WideChar('ó'), WideChar('ú'), WideChar('ü')])
or (TrChar(-1) in [WideChar('á'), WideChar('é'), WideChar('í'), WideChar('ó'), WideChar('ú'), WideChar('ü'), WideChar('ñ')])
then begin TakeI := False; Takel := True; end;//special letters, means it's inside a word
end;
//----------
begin
for W1 := 1 + 2 to Length(St1) - 2 do
begin
//small L --> big i
if St1[W1] = 'l' then
begin
TakeI := False;
//_lll trio
if (TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 'l') and (TrChar(+2) = 'l') then
begin
//_lllx --> _Illx (Illinois, Illegal etc.) + _Ill-
if TrChar(+3) in [WideChar('-'), WideChar('a'), WideChar('b'), WideChar('e'), WideChar('h'), WideChar('i'), WideChar('n'), WideChar('o'), WideChar('p'), WideChar('s'), WideChar('t'), WideChar('u'), WideChar('y')] then TakeI := True;
//._Ill_ (=bedridden) + ._Ill. (Ill. = Illustriert, Illustration [German])
//other than BoL cases not reflected
if (TrChar(-2) in [WideChar('.'), WideChar('[')]) and (TrChar(+3) in [WideChar(' '), WideChar('.')]) then TakeI := True;
//Godfather III / Godfather III.
if not (TrChar(-2) in [WideChar('.'), WideChar('[')]) and (TrChar(+3) in [WideChar(' '), WideChar('.'), WideChar('!'), WideChar('?'), WideChar(',')])
then begin TakeI := True; St1[W1 + 1] := 'I'; St1[W1 + 2] := 'I' end;
end;
//_lgelit _lglu _lgnition _lgrafpapier _lguana
if (TrChar(-1) in [WideChar(' '), WideChar('['), WideChar('('), WideChar('#')]) and (TrChar(+1) = 'g') and
(TrChar(+2) in [WideChar('e'), WideChar('l'), WideChar('n'), WideChar('r'), WideChar('u')]) then TakeI := True;
//ME LOVES MAKARONl! (MY TO SLYSELl. (!?) [cz; no others affected])
if (TrChar(+1) in [WideChar('.'), WideChar('!'), WideChar('?')]) and
(TrChar(-1) = Tnt_WideUpperCase(TrChar(-1))) and
(TrChar(-1) <> '.') and //»pø.n.l.« at EoL case
(TrChar(-2) = Tnt_WideUpperCase(TrChar(-2)))
then TakeI := True;
//UNlVERSAL, lnternet, lnspektion, lnternational; not changed - lnìný [cz; no others affected]
//contain : _l_ case [no Fr!], XlX, Xl_, _lX
// V.l. Lenin, l.V. Lenin (initials of names)
//safely : -lx OR -_lx; [lx OR [_lx; "Ix OR "_Ix
//dangerous: .lxxx
if ((TrChar(-1) = Tnt_WideUpperCase(TrChar(-1))) and
(TrChar(-1) <> '.') and //»pø.n.l.« case [cz; no others affected]
(TrChar(+1) = Tnt_WideUpperCase(TrChar(+1))))
or
((TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 'n') and (TrChar(+2) <> 'ì'))
then TakeI := True;
//last sign at uniline OR space follows
if (TrChar(+1) = ' ') and
(TrChar(-1) = Tnt_WideUpperCase(TrChar(-1)))
then TakeI := True;
//._l | -l | -_l; contain: ._ll_ (italian: Il brutto, il nero)
if ((TrChar(-2) <> '.') and (TrChar(-1) = '.')) or
((TrChar(-2) = ' ') and (TrChar(-1) = '-')) or
((TrChar(-2) in [WideChar('.'), WideChar('-')]) and (TrChar(-1) = ' ')) and
(TrChar(+1) <> 'i') then TakeI := True;
//_ll_ to _Il_ ...this is made only as prep for next
if (TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 'l') and
(TrChar(+2) in [WideChar(' '), WideChar('.'), WideChar(','), WideChar('?'), WideChar('!'), WideChar(':')]) then TakeI := True;
//_Il_ to _II_ (-2 <> . or [) Godfather II & Ramses II
if (TrChar(-3) <> '.') and (TrChar(-3) <> '[') and (TrChar(-3) <> #0) and
(TrChar(-2) = ' ') and (TrChar(-1) = 'I') and
(TrChar(+1) in [WideChar(' '), WideChar('.'), WideChar(','), WideChar('?'), WideChar('!'), WideChar(':')]) then TakeI := True;
//"hey C.W. load the weapons" and "hey... load the weapons"
// 432101 432101
if (TrChar(-4) = '.') and (TrChar(-2) = '.') and (TrChar(-1) = ' ') and
(TrChar(+1) in [WideChar('a'), WideChar('e'), WideChar('i'), WideChar('o'), WideChar('u'), WideChar('y')])
then TakeI := False;
//_Ital...
if (TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 't') and (TrChar(+2) = 'a')
then TakeI := True;
//only for international abnormalities
case RadioGroupCorrectIlLanguage.ItemIndex of
0: StCorrectIl_En;
1: StCorrectIl_Fr;
2: StCorrectIl_Ge;
3: StCorrectIl_Cz;
4: StCorrectIl_It;
5: StCorrectIl_Pl;
6: StCorrectIl_Sp;
end;
if TakeI then
begin
St1[W1] := 'I';
//'-' for I-I-I...
if (W1 > 4) and (St1[W1 - 2] = 'l') and (St1[W1 - 1] = '-') then
if St1[W1 - 3] = ' ' then St1[W1 - 2] := 'I'
else
if (St1[W1 - 4] = 'l') and (St1[W1 - 3] = '-') then
begin St1[W1 - 2] := 'I'; St1[W1 - 4] := 'I'; end;
end;
end;
//big i --> small L
if St1[W1] = 'I' then
begin
Takel := False;
//_III to _Ill
if (TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 'I') and (TrChar(+2) = 'I') then
begin
//_IIIx --> _Illx (Illinois, Illegal etc.) + _Ill-
if (TrChar(+3) in [WideChar('-'), WideChar('a'), WideChar('b'), WideChar('e'), WideChar('h'), WideChar('i'), WideChar('n'), WideChar('o'), WideChar('p'), WideChar('s'), WideChar('t'), WideChar('u'), WideChar('y')])
or
//._Ill_ (=bedridden) + ._Ill. (Ill. = Illustriert, Illustration [German])
//other than BoL cases not reflected
((TrChar(-2) in [WideChar('.'), WideChar('[')]) and (TrChar(+3) in [WideChar(' '), WideChar('.')]))
then begin St1[W1 + 1] := 'l'; St1[W1 + 2] := 'l' end;
//Godfather III / Godfather III. *PASS*
end;
//All, English, German
case TrChar(-1) of
'a'..'z', 'ä', 'ö', 'ü': Takel := True;
else
if not ((TrChar(+1) in
//All, English, German; 'w' - Iwao (Japan name), 'v' - Ivan
[WideChar('A')..WideChar('Z'), WideChar('Ä'), WideChar('Ö'), WideChar('Ü'), WideChar(' '), WideChar('b')..WideChar('d'), WideChar('g'), WideChar('h'), WideChar('k')..WideChar('p'),
WideChar('r')..WideChar('t'), WideChar('v'), WideChar('w'), WideChar('z'), WideChar(#39), WideChar('.'), WideChar(','), WideChar('!'), WideChar('?')]) or //',' prevents PRCl, PRCl, PRCICKY
//+East Europe - Czech, Slovak, Polish
((Form1.Process.AppCharset = EASTEUROPE_CHARSET) and
(TrChar(+1) in
[WideChar('Á'), WideChar('É'), WideChar('Ì'), WideChar('Í'), WideChar('Ó'), WideChar('Ú'), WideChar('Ù'), WideChar('Ý'), WideChar('È'), WideChar('Ï'), WideChar('Ò'), WideChar('Ø'),
WideChar('Š'), WideChar(''), WideChar('Ž'), WideChar('Ô'), WideChar('Å'), WideChar('¼'), WideChar('£'), WideChar(''), WideChar('¯'), WideChar('Ó'), WideChar('¥'), WideChar('Ê'),
WideChar('Œ'), WideChar('Æ'), WideChar('Ñ')])) or
//+Scandinavian
((Form1.Process.AppCharset = ANSI_CHARSET) and
(Ord(TrChar(+1)) in
[198, //AE (alt + 0198) , ANSI WIDE = 262
216, // O with stroke (alt + 0216) , ANSI WIDE = 344
197]) //A with ring above (alt + 0197), ANSI WIDE = 313
) or
//+Italian: _Ieri_
((TrChar(-1) in [WideChar(' '), WideChar('#')]) and (TrChar(+1) = 'e') and
(TrChar(+2) = 'r') and (RadioGroupCorrectIlLanguage.ItemIndex = 4)
)) then Takel := True;
end; //case
//+East Europe - Czech, Slovak, Polish
if (Form1.Process.AppCharset = EASTEUROPE_CHARSET) and
(TrChar(-1) in
[WideChar('á'), WideChar('è'), WideChar('ï'), WideChar('é'), WideChar('ì'), WideChar('í'), WideChar('ò'), WideChar('ó'), WideChar('ø'), WideChar('š'), WideChar(''),
WideChar('ú'), WideChar('ù'), WideChar('ý'), WideChar('ž'), WideChar('ô'), WideChar('å'), WideChar('¾'), WideChar('Ÿ'), WideChar('¿'), WideChar('ó'), WideChar('³'),
WideChar('¹'), WideChar('ê'), WideChar('œ'), WideChar('æ'), WideChar('ñ')]) then Takel := True;
//+Scandinavian
if (Form1.Process.AppCharset = ANSI_CHARSET) and
(Ord(TrChar(-1)) in
[230, //ae (alt + 0230) , ANSI WIDE = 263
248, // o with stroke (alt + 0248) , ANSI WIDE = 345
229]) //a with ring above (alt + 0229), ANSI WIDE = 314
then Takel := True;
//_AIways AIden BIb BIouznit CIaire ÈIovìk PIný SIožit UItra ZIost
if (TrChar(-2) in [WideChar(' '), WideChar('"'), WideChar('-'), WideChar('c'), WideChar('#')]) and //'c' for Mac/Mc (McCIoy)
(TrChar(-1) in [WideChar('A')..WideChar('Z'), WideChar('È'), WideChar('Š'), WideChar('Ž'), WideChar(''), WideChar('Œ')]) and
(TrChar(+1) in [WideChar('a')..WideChar('z'), WideChar('á'), WideChar('è'), WideChar('é'), WideChar('ì'), WideChar('í'), WideChar('ó'), WideChar('ò'), WideChar('š'), WideChar('ù'), WideChar('ý')])
then Takel := True;
//Ihned leave, but Ihostejný --> lhostejný [cz; no others affected]
if (TrChar(+1) = 'h') and (TrChar(+2) <> 'n') then Takel := True;
//_AI_ to _Al_ - name (Artificial Intelligence (AI) not awaited :-)
if (TrChar(-2) in [WideChar(' '), WideChar('"'), WideChar('#')]) and
(TrChar(-1) = 'A') and
(TrChar(+1) in [WideChar(' '), WideChar('"'), WideChar('.'), WideChar(','), WideChar('!'), WideChar('?'), WideChar(#39)])
then Takel := True;
//._II_ to ._Il_ (Il nero on BoL italian)
if (TrChar(-3) in [WideChar('.'), WideChar('['), WideChar('-')]) and (TrChar(-2) = ' ') and
(TrChar(-1) = 'I') and (TrChar(+1) = ' ') then Takel := True;
//(repd. from l-->I procedure too) "EI gringo" to "El g." (BoL Spain)
if (TrChar(-1) = 'E') and (TrChar(+1) = ' ') then Takel := True;
//'II_ --> 'll_
if (TrChar(-1) = #39) and (TrChar(+1) = 'I') and (TrChar(+2) = ' ')
then begin Takel := True; St1[W1 + 1] := 'l' end;
//_Iodic, Iolanthe/Iolite, Ion, Iowa, *Ioya* (but loyalty and more), Iota
//vs Ioad Iocation Iook Iost Ioud Iove
//Ioch Iogický/Iogo Ioket Iomcovák Iopata Iovit
//. Io... (Italian)
if not (TrChar(-2) in [WideChar('.'), WideChar('!'), WideChar('?')]) and
(TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and not
(TrChar(+2) in [WideChar('d'), WideChar('l'), WideChar('n'), WideChar('w'), {'y',} WideChar('t'), WideChar(' '), WideChar('.'), WideChar(','), WideChar('!'), WideChar('?')])
then Takel := True;
//Iodní etc. vs Iodate Ioderma Iodic Iodoethanol [cz only, won't affect others]
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and (TrChar(+2) = 'd') and
not (TrChar(+3) in [WideChar(#0), WideChar('a'), WideChar('e'), WideChar('i'), WideChar('o')]) then Takel := True;
//Iondýnský [cz only, won't affect others]
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and (TrChar(+2) = 'n') and
(TrChar(+3) = 'd') then Takel := True;
//_Iong
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and
(TrChar(+2) = 'n') and (TrChar(+3) = 'g') then Takel := True;
//_Ioni_ --> _loni_ [cz only, don't affect others]
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and (TrChar(+2) = 'n') and
(TrChar(+3) = 'i') and (TrChar(+4) in [WideChar(' '), WideChar('"'), WideChar('.'), WideChar(','), WideChar('!'), WideChar('?'), WideChar(':')])
then Takel := True;
//Iot_ etc. vs Iota
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and
(TrChar(+2) = 't') and (TrChar(+3) <> 'a') then Takel := True;
//Iow_ etc. vs Iowa
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'o') and
(TrChar(+2) = 'w') and (TrChar(+3) <> 'a') then Takel := True;
//_AII_ etc.
if (TrChar(-2) in [WideChar(' '), WideChar('"'), WideChar('-'), WideChar('#')]) and
(TrChar(-1) in [WideChar('A'), WideChar('E'), WideChar('U')]) and (TrChar(+1) = 'I') and
(TrChar(+2) in [WideChar(' '), WideChar(#39), WideChar('-'), WideChar('a')..WideChar('z')])
then begin Takel := True; St1[W1 + 1] := 'l' end;
//Eng only, no others affected: _If_, _If-, Iffy, Ifle, Ifor
if (TrChar(-1) in [WideChar(' '), WideChar('"'), WideChar('#')]) and (TrChar(+1) = 'f') and
(TrChar(+2) in [WideChar(' '), WideChar('-'), WideChar('f'), WideChar('l'), WideChar('o')]) then Takel := False;
//"hey C.W. Ioad the weapons" and "hey... Ioad the weapons"
// 432101 432101
if (TrChar(-4) = '.') and (TrChar(-2) = '.') and (TrChar(-1) = ' ') and
(TrChar(+1) in [WideChar('a'), WideChar('e'), WideChar('i'), WideChar('o'), WideChar('u'), WideChar('y')])
then Takel := True;
//only for international abnormalities
case RadioGroupCorrectIlLanguage.ItemIndex of
//0: StCorrectIl_En;
1: StCorrectIl_Fr;
//2: StCorrectIl_Ge;
//3: StCorrectIl_Cz;
4: StCorrectIl_It;
end;
if Takel then
begin
St1[W1] := 'l';
//'-' for 'Uh, l-ladies and gentleman...' / 'just as you l-left it'
if (W1 > 4) and (St1[W1 - 2] = 'I') and (St1[W1 - 1] = '-') then
if St1[W1 - 3] = ' ' then St1[W1 - 2] := 'l'
else
if (St1[W1 - 4] = 'I') and (St1[W1 - 3] = '-') then
begin St1[W1 - 2] := 'l'; St1[W1 - 4] := 'l'; end;
end;
end;
end;
end;
//----------
procedure StripEndTags;
begin
St2:= Tnt_WideStringReplace(St2, '</b>', '', [rfReplaceAll, rfIgnoreCase]);
St2:= Tnt_WideStringReplace(St2, '</i>', '', [rfReplaceAll, rfIgnoreCase]);
St2:= Tnt_WideStringReplace(St2, '</u>', '', [rfReplaceAll, rfIgnoreCase]);
end;
//----------
begin {AllSubCorrectIl}
TakeI := False; Takel := False;
with Form1.Subtitles do
for WNbSup := 1 to NbSub do begin
for BLine := 1 to Sub[WNbSup].NbLine do
begin
//Pour pouvoir faire St1-2 et St1+2 au debut et à la fin de St1
St1 := ' ' + Sub[WNbSup].Line[BLine] + ' ';
if BLine > 1 then
begin
//Pour voir la ligne précédente (Ex: pour savoir si elle se termine par un point)
St2 := Sub[WNbSup].Line[BLine - 1];
StripEndTags; //kick out possible end tags
end
else
if WNbSup = 1 then St2 := '.'
else
begin
St2 := Sub[WNbSup - 1].Line[Sub[WNbSup - 1].NbLine];
StripEndTags; //kick out possible end tags
end;
//Copie le dernier caractere de la ligne precedente dans l'espace ajouté à la ligne courante
St1[1] := St2[Length(St2)];
StCorrectIl; //correct one uniline
//On remplace par la ligne traitée (Sans les espaces précédement ajoutés)
Sub[WNbSup].Line[BLine] := Copy(St1, 1 + 2, Length(St1) - 4);
end;
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
end; {AllSubCorrectIl}
//----- '' --> " and "" --> " -------------------------------------------------
procedure TForm10.AllSubCorrectAp;
var
WNbSup: Word;
BLine: Byte;
begin
with Form1.Subtitles do
for WNbSup := 1 to NbSub do begin
for BLine := 1 to Sub[WNbSup].NbLine do
begin
Sub[WNbSup].Line[BLine] := Tnt_WideStringReplace(Sub[WNbSup].Line[BLine], '`', #39, [rfReplaceAll]);
Sub[WNbSup].Line[BLine] := Tnt_WideStringReplace(Sub[WNbSup].Line[BLine], '´', #39, [rfReplaceAll]);
Sub[WNbSup].Line[BLine] := Tnt_WideStringReplace(Sub[WNbSup].Line[BLine], #39#39, '"', [rfReplaceAll]);
Sub[WNbSup].Line[BLine] := Tnt_WideStringReplace(Sub[WNbSup].Line[BLine], '""', '"', [rfReplaceAll]);
end;
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
end;
//------------------------------------------------------------------------------
procedure TForm10.CheckDic(var SDic: TTntStrings);
var
I: Integer;
begin
//odd count 2 even
if Odd(SDic.Count) then //ToDo report
SDic.Delete(SDic.Count - 1);
//prevent endless strings & loops (e.g. '..' to '...', or 'a' to 'a')
I := 0;
while SDic.Count > I do
begin
if Pos(SDic[I], SDic[I + 1]) > 0 then
begin
SDic.Delete(I + 1); SDic.Delete(I); //delete problematic items; order!
end;
Inc(I, 2);
end;
end;
//-Added by MJQ(subrip@divx.pl)-------------------------------------------------
procedure TForm10.AllSubCorrectPunctuation;
var
WNbSup: Word;
BLine, QoutCnt: Byte;
W1: Word;
S1: Widestring;
I: Integer;
SDic: TTntStrings;
const
Chars1 = '.,:;%$!?';
begin
SDic := TTntStringList.Create;
try
SDic.LoadFromFile(AppzDir + 'Dict\punct.dic');
CheckDic(SDic);
//add ' ' to ' '
SDic.Add(' '); SDic.Add(' ');
with Form1.Subtitles do
for WNbSup := 1 to NbSub do
begin
for BLine := 1 to Sub[WNbSup].NbLine do
begin
S1 := Sub[WNbSup].Line[BLine];
//"--" at the beginning of dialogues (Spy Kids 2, Stolen Summer, Sweet Home Alabama)
I := 1;
while (S1[I] = '<') and (S1[I + 1] in [WideChar('b'), WideChar('i'), WideChar('u')]) and //styles
(S1[I + 2] = '>') do Inc(I, 3);
if (S1[I] = '-') and (S1[I + 1] = '-') and (S1[I + 2] <> '-') then
Delete(S1, I, 1);
//removed from punct.dic: |, -| -> |. -| and |,-| -> |. -|
//it is dangerous in such a cases: |These are my operatives, -|
//ToDo SW way
//dict
for I := 0 to (SDic.Count - 1) div 2 do
while Pos(SDic[2 * I], S1) > 0 do
S1 := Tnt_WideStringReplace(S1, SDic[2 * I], SDic[2 * I + 1], [rfReplaceAll]);
//".." to "..."
W1 := Pos('..', S1);
if W1 > 0 then
while Length(S1) > W1 do
begin
if ((W1 = 1) or (S1[W1 - 1] <> '.')) and
(S1[W1] = '.') and (S1[W1 + 1] = '.') and
((Length(S1) = W1 + 1) or (S1[W1 + 2] <> '.')) then
Insert('.', S1, W1);
Inc(W1);
end;
//"..."
repeat
W1 := Pos('...', S1);
if W1 > 0 then
//Example "It is ..." -> "It is..."
if (W1 = Length(S1) - 2) and (S1[W1 - 1] = ' ') then
S1 := Copy(S1, 1, Length(S1) - 4) + '<><>'
else
//Example "... it is" -> "...it is"
if (W1 = 1) and (S1[W1 + 3] = ' ')
then S1 := '<><>' + Copy(S1, 5, Length(S1) - 3)
else
//Example "It...is" -> "It... is"
if (W1 > 1) and (W1 < Length(S1) - 2) and
not (S1[W1 + 3] in [WideChar(' '), WideChar(']'), WideChar(','), WideChar('<')]) and //'<' styles
not (S1[W1 - 2] in [WideChar('?'), WideChar('!')])
then S1 := Copy(S1, 1, W1 - 1) + '<><> ' + Copy(S1, W1 + 3, Length(S1) - (W1 + 2))
else
//Example "Who?... there..." -> "Who? ...there..."
if (W1 > 1) and (W1 < Length(S1) - 2) and
(S1[W1 - 1] in [WideChar('?'), WideChar('!')]) then
S1 := Copy(S1, 1, W1 - 1) + ' <><>' + Copy(S1, W1 + 4, Length(S1) - (W1 + 3))
else
S1 := Copy(S1, 1, W1 - 1) + '<><>' + Copy(S1, W1 + 3, Length(S1) - (W1 + 2));
until W1 = 0;
S1 := Tnt_WideStringReplace(S1, '?<><> ', '? <><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '<><> ?', '<><>?', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '<><> !', '<><>!', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '!<><> ', '! <><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '"<><> ', '"<><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '<><> "', '<><>"', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '- <><> ', '<><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '- <><>', '<><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '-<><> ', '<><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '-<><>', '<><>', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, ' <><> ', '<><> ', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, ',<><>', '<><>', [rfReplaceAll]);
//|"_| -> |"| or |_"| -> |"| - sometimes in safe cases
QoutCnt := 0;
for I := 1 to Length(S1) do
if S1[I] = '"' then Inc(QoutCnt);
if QoutCnt > 0 then
if not Odd(QoutCnt) then
begin
for I := 1 to Length(S1) do
if S1[I] = '"' then
begin
if Odd(QoutCnt) then
begin
if (I > 1) and (S1[I - 1] = ' ') then Delete(S1, I - 1, 1);
end
else
if (I < Length(S1)) and (S1[I + 1] = ' ') then Delete(S1, I + 1, 1);
Dec(QoutCnt);
end;
end
else
begin
//aftercheck 1: |"_| at position 1 (if only 1x " appear as beginning)
if Pos('" ', S1) = 1 then Delete(S1, 2, 1);
//aftercheck 2: |_"| at last position (if only 1x " appear as end)
if Pos(' "', S1) = Length(S1) - 1 then Delete(S1, Length(S1) - 1, 1);
end;
//"-"
repeat
W1 := Pos('-', S1);
if W1 > 0 then
begin
//Example "-It is" -> "- It is"
if (W1 = 1) and (S1[W1 + 1] <> ' ')
then S1 := '[][] ' + Copy(S1, 2, Length(S1) - 1)
else
//Example "Hi. -It is." -> "Hi. - It is." (works for styles: "<i>-It is")
if ((S1[W1 - 2] in [WideChar('.'), WideChar('!'), WideChar('?'), WideChar('>'){no styles}]) or (S1[W1 - 1] = '>'{styles})) and (S1[W1 + 1] <> ' ')
then S1 := Copy(S1, 1, W1 - 1) + '[][] ' + Copy(S1, W1 + 1, Length(S1) - (W1))
else
//Example "hi - fi" -> "hi-fi"
//NOT GOOD IDEA:
//1) Let's kiss. But remember - no tongues.
//2) He is a Dunpeal - a half human, half vampire.
//3) 3 against 1 - how unfair.
{if (S1[W1 - 1] = ' ') and (S1[W1 + 1] = ' ') and (S1[W1 - 2] >= 'a') and
(S1[W1 + 2] >= 'a') and (S1[W1 - 2] <= 'z') and (S1[W1 + 2] <= 'z')
then S1 := Copy(S1, 1, W1 - 2) + '[][]' + Copy(S1, W1 + 2, Length(S1) - (W1 + 1))
else}
S1 := Copy(S1, 1, W1 - 1) + '[][]' + Copy(S1, W1 + 1, Length(S1) - (W1));
end;
until W1 = 0;
//Spaces after "," "." ":" ";" "$" "%"
if (Pos('www.', S1) = 0) and (Pos('.com ', S1) = 0) and
(Pos('.pl ', S1) = 0) and (Pos('.cz ', S1) = 0) and
(Pos('@', S1) = 0) and (Pos('tp://', S1) = 0) then
for I := 1 to 6 do
begin
repeat
W1 := Pos(Chars1[I], S1);
if W1 > 0 then
if (W1 <> Length(S1)) and not (S1[W1 + 1] in [WideChar(' '), WideChar('"'), WideChar(#39),
WideChar('?'), WideChar('!'), WideChar(','), WideChar(']'), WideChar('}'), WideChar(')'), WideChar('<')]) //'<' styles
and ((S1[W1 - 1] < '0') or (S1[W1 - 1] > '9')) and ((S1[W1 + 1] < '0') or (S1[W1 + 1] > '9'))
and (S1[W1 + 2] <> '.')
then S1 := Copy(S1, 1, W1 - 1) + '()() ' + Copy(S1, W1 + 1, Length(S1) - (W1))
else
S1 := Copy(S1, 1, W1 - 1) + '()()' + Copy(S1, W1 + 1, Length(S1) - W1);
until W1 = 0;
S1 := Tnt_WideStringReplace(S1, '()()', Chars1[I], [rfReplaceAll]);
end;
//Spaces after "!" "?"
for I := 7 to 8 do
begin
repeat
W1 := Pos(Chars1[I], S1);
if W1 > 0 then
if (W1 <> Length(S1)) and (S1[W1 + 1] <> ' ') and not (S1[W1 + 1] in [WideChar('!'), WideChar('?'),
WideChar('"'), WideChar(']'), WideChar('}'), WideChar(')'), WideChar('<')]) //'<' styles
then S1 := Copy(S1, 1, W1 - 1) + '()() ' + Copy(S1, W1 + 1, Length(S1) - (W1))
else
S1 := Copy(S1, 1, W1 - 1) + '()()' + Copy(S1, W1 + 1, Length(S1) - (W1));
until W1 = 0;
S1 := Tnt_WideStringReplace(S1, '()()', Chars1[I], [rfReplaceAll]);
end;
S1 := Tnt_WideStringReplace(S1, '[][]', '-', [rfReplaceAll]);
//S1 := Tnt_WideStringReplace(S1, '{}{}', ',', [rfReplaceAll]);
S1 := Tnt_WideStringReplace(S1, '<><>', '...', [rfReplaceAll]);
Sub[WNbSup].Line[BLine] := S1;
end;
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
finally
SDic.Free;
end;
end;
//-Polish orthography (sie --> siê etc.)---Added by MJQ (subrip@divx.pl)--------
procedure TForm10.AllSubCorrectOrthography;
var
WNbSup: Word;
BLine: Byte;
SDic: TTntStrings;
I: Integer;
begin
SDic := TTntStringList.Create;
try
SDic.LoadFromFile(AppzDir + 'Dict\' + ComboBoxLangOrtho.Text + '.dic');
CheckDic(SDic);
with Form1.Subtitles do
for WNbSup := 1 to NbSub do
begin
for BLine := 1 to Sub[WNbSup].NbLine do
for I := 0 to (SDic.Count - 1) div 2 do
Sub[WNbSup].Line[BLine] := Tnt_WideStringReplace(Sub[WNbSup].Line[BLine],
SDic[2 * I], SDic[2 * I + 1], [rfReplaceAll]-[rfIgnoreCase]);
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
finally
SDic.Free;
end;
end;
//-Added by MJQ(subrip@divx.pl)-------------------------------------------------
procedure TForm10.AllSubCorrectCapitalLetters;
var
WNbSup: Word;
BLine: Byte;
S1, Sstrip: WideString;
W1, I, J: Word;
EndOfPrevSentence: Boolean;
const
Chars2 = '-.!?:';
begin
EndOfPrevSentence := True; //to set the 1st letter of 1st subtitle to UpperCase
with Form1.Subtitles do
for WNbSup := 1 to NbSub do
begin
for BLine := 1 to Sub[WNbSup].NbLine do
begin
S1 := Sub[WNbSup].Line[BLine];
if CheckBoxCase.Checked then S1 := Tnt_WideLowerCase(S1);
//Change "..." to "<><>"
S1 := Tnt_WideStringReplace(S1, '...', '<><>', [rfReplaceAll]);
//Capital letters after ". " "? " "! " "- "
for I := 1 to Length(Chars2) do
begin
repeat
W1 := Pos(Chars2[I] + ' ', S1);
if W1 > 1 then //take Pos from 2 and higher ("- " elsewhere)
if W1 <> Length(S1) then
if ((I = 2) {'.'} and (S1[W1 - 1] in [WideChar('0') .. WideChar('9')])) or //prevents: 28. srpna -> 28. Srpna
((I = 1) {'-'} and not (S1[W1 - 2] in [WideChar('.'), WideChar('!'), WideChar('?'), WideChar('¿'), WideChar('¡')])) then //UpperCase only after .!?¿¡
S1 := Copy(S1, 1, W1 - 1) + '()()' + Copy(S1, W1 + 1, Length(S1) - W1)
else
S1 := Copy(S1, 1, W1 - 1) + '()() ' + Tnt_WideUpperCase(S1[W1 + 2]) + Copy(S1, W1 + 3, Length(S1) - (W1 + 2))
else
S1 := Copy(S1, 1, W1 - 1) + '()()' + Copy(S1, W1 + 1, Length(S1) - W1);
until W1 <= 1;
S1 := Tnt_WideStringReplace(S1, '()()', Chars2[I], [rfReplaceAll]);
end;
//Change "<><>" back to "..."
S1 := Tnt_WideStringReplace(S1, '<><>', '...', [rfReplaceAll]);
//change the 1st letter case according to previous subtitle line
if EndOfPrevSentence and (S1[1] <> '.') then //...bla
begin
J := 1;
while (S1[J] = '<') and (S1[J + 1] in [WideChar('b'), WideChar('i'), WideChar('u')]) and //styles
(S1[J + 2] = '>') do Inc(J, 3);
if S1[J] <> '.' then //<i>...bla
while J <= Length(S1) do
begin
//Alex: (battle cries) --> (Battle cries)
//It's not expected behaviour. It'll be much better to stay words in "()" intact.
if not (S1[J] in [WideChar('['), {'(',} WideChar('"'), WideChar(#39), WideChar('#'), WideChar('-'), WideChar(' ')]) then
begin
S1[J] := Tnt_WideUpperCase(S1[J])[1];
Break;
end;
Inc(J);
end;
end;
//prepare for next line
Sstrip := Trim(Form3.StripStyle(S1)); //styles
J := Length(Sstrip);
if J = 0 then //in case of empty string (only styles)
EndOfPrevSentence := False
else
begin
while (Sstrip[J] in [WideChar(#39), WideChar('"'), WideChar(' ')]) and (J > 1) do Dec(J);
EndOfPrevSentence := Sstrip[J] in [WideChar('.'), WideChar('!'), WideChar('?'), WideChar(':'), WideChar(']'), WideChar(')')];
if (Sstrip[J] = '.') and (Sstrip[J - 1] = '.') and
(Sstrip[J - 2] = '.') then EndOfPrevSentence := False;
end;
Sub[WNbSup].Line[Bline] := S1;
end;
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
end;
//------------------------------------------------------------------------------
procedure TForm10.AllSubRemoveSpaceBtNumbers;
var
WNbSup: Word;
BLine: Byte;
I: Integer;
begin
with Form1.Subtitles do
for WNbSup := 1 to NbSub do
begin
for BLine := 1 to Sub[WNbSup].NbLine do
for I := 1 to Length(Sub[WNbSup].Line[BLine]) - 2 do
if (Length(Sub[WNbSup].Line[BLine]) >= I + 2) {Delete!} and
(Sub[WNbSup].Line[BLine][I] in [WideChar('0')..WideChar('9'), WideChar('/')]) and //was: '1', '4', '7'
(Sub[WNbSup].Line[BLine][I + 1] = ' ') then
if (Sub[WNbSup].Line[BLine][I + 2] in [WideChar('0')..WideChar('9'), WideChar(','), WideChar('.'), WideChar(':'), WideChar('/')]) or
//5 -1-5
((Sub[WNbSup].Line[BLine][I + 2] = WideChar('-')) and
(Length(Sub[WNbSup].Line[BLine]) >= I + 3) {Delete!} and
(Sub[WNbSup].Line[BLine][I + 3] in [WideChar('0')..WideChar('9')])) then
Delete(Sub[WNbSup].Line[BLine], I + 1, 1);
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
end;
//------------------------------------------------------------------------------
procedure TForm10.WholeWordFormat;
var
WNbSup: Word;
BLine: Byte;
rep,I,b,e,p,q,total,marked: Integer;
line,newline: WideString;
bold,italic,underline: array of integer;
bb,bi,bu:integer;
m:boolean;
begin
with Form1.Subtitles do
for WNbSup := 1 to NbSub do
begin
for BLine := 1 to Sub[WNbSup].NbLine do
begin
line:=Sub[WNbSup].Line[BLine];
SetLength(bold,0);
SetLength(italic,0);
SetLength(underline,0);
newline:='';
bb:=0;
bi:=0;
bu:=0;
I:=1;
while I<=Length(line) do
begin;//strip the tags and extract the attributes
m:=false;
if line[I]='<' then
begin
if I<=Length(line)-2 then
if (line[I+2]='>') then
begin
if (line[I+1]='i')then bi:=1;
if (line[I+1]='b')then bb:=1;
if (line[I+1]='u')then bu:=1;
m:=true;
I:=I+2;
end;
if I<=Length(line)-3 then
if (line[I+1]='/')and(line[I+3]='>') then
begin
if (line[I+2]='i')then bi:=0;
if (line[I+2]='b')then bb:=0;
if (line[I+2]='u')then bu:=0;
m:=true;
I:=I+3;
end;
end;
if not m then
begin//not inside a tag
newline:=newline+line[I];
p:=Length(newline);
SetLength(bold,p+1);
SetLength(italic,p+1);
SetLength(underline,p+1);
italic[p]:=bi;
bold[p]:=bb;
underline[p]:=bu;
end;
I:=I+1;
end;
I:=2;
while I<=Length(newLine) do
begin//enforce whole words only
if (italic[I]>0)and(newline[i]<>' ') then
begin
total:=0;
marked:=1;
p:=I-1;
b:=I;
while (newline[p]<>' ')and(p>0) do
begin
if italic[p]>0 then marked:=marked+1;
p:=p-1;
b:=b-1;
end;
p:=I+1;
e:=I;
while (newline[p]<>' ')and(p<=Length(newline)) do
begin
if italic[p]>0 then marked:=marked+1;
p:=p+1;
e:=e+1;
end;
if marked<(p-I)/2 then rep:=0 else rep:=1000;
for q:=b to e do italic[q]:=rep;
if rep=1000 then
begin
italic[b]:=e-b+1;
italic[e]:=-e+b-1;
if e=b then italic[e]:=1;
end;
end;
if (underline[I]>0)and(newline[i]<>' ') then
begin
total:=0;
marked:=1;
p:=I-1;
b:=I;
while (newline[p]<>' ')and(p>0) do
begin
if underline[p]>0 then marked:=marked+1;
p:=p-1;
b:=b-1;
end;
p:=I+1;
e:=I;
while (newline[p]<>' ')and(p<=Length(newline)) do
begin
if underline[p]>0 then marked:=marked+1;
p:=p+1;
e:=e+1;
end;
if marked<(p-I)/2 then rep:=0 else rep:=1000;
for q:=b to e do underline[q]:=rep;
if rep=1000 then
begin
underline[b]:=e-b+1;
underline[e]:=-e+b-1;
if e=b then underline[e]:=1;
end;
end;
if (bold[I]>0)and(newline[i]<>' ') then
begin
total:=0;
marked:=1;
p:=I-1;
b:=I;
while (newline[p]<>' ')and(p>0) do
begin
if bold[p]>0 then marked:=marked+1;
p:=p-1;
b:=b-1;
end;
p:=I+1;
e:=I;
while (newline[p]<>' ')and(p<=Length(newline)) do
begin
if bold[p]>0 then marked:=marked+1;
p:=p+1;
e:=e+1;
end;
if marked<(p-I)/2 then rep:=0 else rep:=1000;
for q:=b to e do bold[q]:=rep;
if rep=1000 then
begin
bold[b]:=e-b+1;
bold[e]:=-e+b-1;
if e=b then bold[e]:=1;
end;
end;
I:=I+1;
end;
I:=1;//move formatting at beginning/ending of words
while I<=Length(newLine) do
begin
if newLine[I]=' ' then
begin
if I>1 then
begin
if (italic[I]<>0)and(italic[i-1]=0)then italic[I]:=0;
if (underline[I]<>0)and(underline[i-1]=0)then underline[I]:=0;
if (bold[I]<>0)and(bold[i-1]=0)then bold[I]:=0;
end;
if I<Length(newLine) then
begin
if (italic[I]<>0)and(italic[i+1]=0)then italic[I]:=0;
if (underline[I]<>0)and(underline[i+1]=0)then underline[I]:=0;
if (bold[I]<>0)and(bold[i+1]=0)then bold[I]:=0;
end;
end;
I:=I+1;
end;
I:=1;//put together words with the same formatting
while I<=Length(newLine) do
begin
if (italic[I]>0)and(italic[I]<>1000)then
begin//beginning of a word
b:=I;
p:=I;
while p<Length(newline) do
begin
while (italic[p])>0 do p:=p+1;//find the end
if p<Length(newline)-2 then//not the last word
begin
if italic[p+1]=1 then
begin
italic[b]:=italic[b]+italic[p+2]+1;//change length at beginning
italic[p+1+italic[p+2]]:=-italic[b];//change length at end
italic[p]:=1000;
italic[p+1]:=1000;
italic[p+2]:=1000;
end;
end;
p:=p+1;
if italic[p]=0 then p:=Length(newline);//exit
end;
end;
if (underline[I]>0)and(underline[I]<>1000)then
begin//beginning of a word
b:=I;
p:=I;
while p<Length(newline) do
begin
while (underline[p])>0 do p:=p+1;//find the end
if p<Length(newline)-2 then//not the last word
begin
if underline[p+1]=1 then
begin
underline[b]:=underline[b]+underline[p+2]+1;//change length at beginning
underline[p+1+underline[p+2]]:=-underline[b];//change length at end
underline[p]:=1000;
underline[p+1]:=1000;
underline[p+2]:=1000;
end;
end;
p:=p+1;
if underline[p]=0 then p:=Length(newline);//exit
end;
end;
if (bold[I]>0)and(bold[I]<>1000)then
begin//beginning of a word
b:=I;
p:=I;
while p<Length(newline) do
begin
while (bold[p])>0 do p:=p+1;//find the end
if p<Length(newline)-2 then//not the last word
begin
if bold[p+1]=1 then
begin
bold[b]:=bold[b]+bold[p+2]+1;//change length at beginning
bold[p+1+bold[p+2]]:=-bold[b];//change length at end
bold[p]:=1000;
bold[p+1]:=1000;
bold[p+2]:=1000;
end;
end;
p:=p+1;
if bold[p]=0 then p:=Length(newline);//exit
end;
end;
I:=I+1;
end;
I:=1;//build back the formatted text
line:='';
bi:=0;
bb:=0;
bu:=0;
while I<=Length(newLine) do
begin
//open tags
if (bold[I]>0)and (bb=0) then
begin
bb:=bold[I];
if (underline[I]>0)and (bu=0) then
begin
bu:=underline[I];
if (italic[I]>0)and (bi=0) then
begin//all three
bi:=italic[I];
if (bb<=bu)and(bu<=bi) then line:=line+'<i><u><b>';//IUB
if (bb<=bi)and(bi<bu) then line:=line+'<u><i><b>';//UIB
if (bi<bu)and(bu<bb) then line:=line+'<b><u><i>';//BUI
if (bi<bb)and(bb<=bu) then line:=line+'<u><b><i>';//UBI
if (bu<bb)and(bb<=bi) then line:=line+'<i><b><u>';//IBU
if (bu<=bi)and(bi<bb) then line:=line+'<b><i><u>';//BIU
end
else
begin//just BU
if (bb<bu)then line:=line+'<u><b>'//UB
else line:=line+'<b><u>';//BU
end;
end
else
begin
if (italic[I]>0)and (bi=0) then
begin//just BI
bi:=italic[I];
if (bb<bi)then line:=line+'<i><b>'//IB
else line:=line+'<b><i>';//BI
end
else line:=line+'<b>';//B
end;
end
else if (underline[I]>0)and (bu=0) then
begin
bu:=underline[I];
if (italic[I]>0)and (bi=0) then
begin//just UI
bi:=italic[I];
if (bu<bi)then line:=line+'<i><u>'//IU
else line:=line+'<u><i>';//UI
end
else line:=line+'<u>';//U
end
else if (italic[I]>0)and (bi=0) then
begin
bi:=italic[I];
line:=line+'<i>';
end;
line:=line+newline[I];//put the text
//close tags
if (italic[I]<0) then
begin
bi:=-italic[I];
if (underline[I]<0) then
begin
bu:=-underline[I];
if (bold[I]<0) then
begin//all three
bb:=-bold[I];
if (bb<=bu)and(bu<=bi) then line:=line+'</b></u></i>';//BUI
if (bb<=bi)and(bi<bu) then line:=line+'</b></i></u>';//BIU
if (bi<bu)and(bu<bb) then line:=line+'</i></u></b>';//IUB
if (bi<bb)and(bb<=bu) then line:=line+'</i></b></u>';//IBU
if (bu<bb)and(bb<=bi) then line:=line+'</u></b></i>';//UBI
if (bu<=bi)and(bi<bb) then line:=line+'</u></i></b>';//UIB
bb:=0;
end
else
begin//just IU
if (bi<=bu)then line:=line+'</i></u>'//IU
else line:=line+'</u></i>';//UI
end;
bu:=0;
end
else
begin
if (bold[I]<0) then
begin//just IB
bb:=-bold[I];
if (bi<=bb)then line:=line+'</i></b>'//IB
else line:=line+'</b></i>';//BI
bb:=0;
end
else line:=line+'</i>';//I
end;
bi:=0;
end
else if (underline[I]<0) then
begin
bu:=-underline[I];
if (bold[I]<0) then
begin//just UB
bb:=-bold[I];
if (bu<=bb)then line:=line+'</u></b>'//UB
else line:=line+'</b></u>';//BU
bb:=0;
end
else line:=line+'</u>';//U
bu:=0;
end
else if (bold[I]<0) then
begin
bb:=-bold[I];
line:=line+'</b>';
bb:=0;
end;
I:=I+1;
end;
line := Trim(line);
line := Tnt_WideStringReplace(line, '<i> ', ' <i>', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, '<u> ', ' <u>', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, '<b> ', ' <b>', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, ' </i>', '</i> ', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, ' </u>', '</u> ', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, ' </b>', '</b> ', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, '</i> <i>', ' ', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, '</u> <u>', ' ', [rfReplaceAll]);
line := Tnt_WideStringReplace(line, '</b> <b>', ' ', [rfReplaceAll]);
Sub[WNbSup].Line[BLine]:=line;
end;//for all lines
ProgressBarAll.StepIt;
ProgressBarIndividual.StepIt;
end;
SetLength(bold,0);
SetLength(italic,0);
SetLength(underline,0);
end;
//------------------------------------------------------------------------------
procedure TForm10.FormShow(Sender: TObject); //check existing .dic each FormShow
var
Rec: TSearchRecW;
StArr: TTntStringList;
S: Widestring;
begin
S := ComboBoxLangOrtho.Items[ComboBoxLangOrtho.ItemIndex];
StArr := TTntStringList.Create;
try
if (WideFindFirst(AppzDir + 'Dict\*.dic', faAnyfile, Rec) = 0) and
(Rec.Name <> 'punct.dic') then
StArr.Add(Copy(Rec.Name, 1, Length(Rec.Name) - 4));
while WideFindNext(Rec) = 0 do
if Rec.Name <> 'punct.dic' then
StArr.Add(Copy(Rec.Name, 1, Length(Rec.Name) - 4));
WideFindClose(Rec);
StArr.Sort;
ComboBoxLangOrtho.Items.Assign(StArr);
if ComboBoxLangOrtho.Items.IndexOf(S) >= 0 then
ComboBoxLangOrtho.ItemIndex := ComboBoxLangOrtho.Items.IndexOf(S)
else
ComboBoxLangOrtho.ItemIndex := 0;
finally
StArr.Free;
if ComboBoxLangOrtho.Items.Count = 0 then
begin
CheckBoxCorrectOrthography.Checked := False;
CheckBoxCorrectOrthography.Enabled := False;
end
else
CheckBoxCorrectOrthography.Enabled := True;
end;
end;
//------------------------------------------------------------------------------
end.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment