adil192/split_file.dart

## split_file.dart
import 'dart:io';
import 'dart:math';

typedef Section = ({String title, String content});

/// Runs a command like
/// `tts --text "hello" --model_name "tts_models/uk/mai/glow-tts" --vocoder_name "vocoder_models/uk/mai/multiband-melgan" --out_path hello.wav`
Future<void> tts(int i, Section section) async {
  final escapedContent = section.content.trim().replaceAll('\n', ' ');
  final escapedTitle = section.title.trim().replaceAll(RegExp(r'\W'), '_');

  final chapterTextFile = File('raw/$i-$escapedTitle.txt');
  if (!await chapterTextFile.exists()) {
    await chapterTextFile.writeAsString(section.content);
  }

  // My GPU can't handle too much text at once, so split it into chunks
  final contentChunks = [];
  int start = 0;
  while (start < escapedContent.length) {
    // Find a full stop to split on
    var end = escapedContent.indexOf(
      '.',
      min(start + 2000, escapedContent.length),
    );
    if (end == -1) end = escapedContent.length;

    contentChunks.add(escapedContent.substring(start, end));
    start = end + 1;
  }

  final targetFileNoExt = 'output/$i-$escapedTitle';
  if (await File('$targetFileNoExt.wav').exists()) {
    print('  - Already exists, skipping');
    return;
  }

  final List<File> chunkFiles = [];
  for (var chunkIndex = 0; chunkIndex < contentChunks.length; chunkIndex++) {
    final chunkFile = '$targetFileNoExt-chunk-$chunkIndex.wav';
    final chunkContent = contentChunks[chunkIndex];
    chunkFiles.add(File(chunkFile));
    print('  - $chunkFile');

    if (await File(chunkFile).exists()) {
      print('    - Already exists, skipping');
      continue;
    }

    final process = await Process.run(
      '/home/ahann/.local/bin/tts',
      [
        '--text',
        '$chunkContent',
        '--model_name',
        // 'tts_models/en/ljspeech/neural_hmm',
        'tts_models/en/jenny/jenny',
        '--out_path',
        chunkFile,
        '--use_cuda',
        'true',
      ],
    );
    print(
        '${section.title} (Chunk $chunkIndex / ${contentChunks.length - 1}) ${process.stdout} ${process.stderr}\n\n');
  }

  // Merge the chunks
  if (chunkFiles.length == 1) {
    await chunkFiles.first.rename('$targetFileNoExt.wav');
  } else {
    final process = await Process.run(
      '/usr/bin/sox',
      [
        ...chunkFiles.map((f) => f.path),
        '$targetFileNoExt.wav',
      ],
    );
    print('${section.title} (Merge) ${process.stdout} ${process.stderr}\n\n');
    await Future.wait(chunkFiles.map((f) => f.delete()));
  }
}

/// Splits the input file into sections
Iterable<Section> splitSections(List<String> lines) sync* {
  final buffer = StringBuffer();
  String title = lines.first;

  for (final line in lines.skip(1)) {
    if (line.startsWith(RegExp(r'^\d+\.\d+ '))) {
      // yield the previous section
      yield (title: title, content: buffer.toString());
      // start a new section
      buffer.clear();
      title = line;
      continue;
    }
    buffer.writeln(line);
  }

  // yield the last section
  yield (title: title, content: buffer.toString());
}

Future<void> main() async {
  final fullFile = File('ux.txt');
  final lines = await fullFile.readAsLines();
  print('Read ${lines.length} lines');

  int i = 0;
  for (final section in splitSections(lines)) {
    if (false) {
      print('Skipping ${section.title} (${section.content.length} chars)');
    } else {
      print('Processing ${section.title} (${section.content.length} chars)');
      await tts(i, section);
    }

    i++;
  }
}
	import 'dart:io';
	import 'dart:math';

	typedef Section = ({String title, String content});

	/// Runs a command like
	/// `tts --text "hello" --model_name "tts_models/uk/mai/glow-tts" --vocoder_name "vocoder_models/uk/mai/multiband-melgan" --out_path hello.wav`
	Future<void> tts(int i, Section section) async {
	final escapedContent = section.content.trim().replaceAll('\n', ' ');
	final escapedTitle = section.title.trim().replaceAll(RegExp(r'\W'), '_');

	final chapterTextFile = File('raw/$i-$escapedTitle.txt');
	if (!await chapterTextFile.exists()) {
	await chapterTextFile.writeAsString(section.content);
	}

	// My GPU can't handle too much text at once, so split it into chunks
	final contentChunks = [];
	int start = 0;
	while (start < escapedContent.length) {
	// Find a full stop to split on
	var end = escapedContent.indexOf(
	'.',
	min(start + 2000, escapedContent.length),
	);
	if (end == -1) end = escapedContent.length;

	contentChunks.add(escapedContent.substring(start, end));
	start = end + 1;
	}

	final targetFileNoExt = 'output/$i-$escapedTitle';
	if (await File('$targetFileNoExt.wav').exists()) {
	print(' - Already exists, skipping');
	return;
	}

	final List<File> chunkFiles = [];
	for (var chunkIndex = 0; chunkIndex < contentChunks.length; chunkIndex++) {
	final chunkFile = '$targetFileNoExt-chunk-$chunkIndex.wav';
	final chunkContent = contentChunks[chunkIndex];
	chunkFiles.add(File(chunkFile));
	print(' - $chunkFile');

	if (await File(chunkFile).exists()) {
	print(' - Already exists, skipping');
	continue;
	}

	final process = await Process.run(
	'/home/ahann/.local/bin/tts',
	[
	'--text',
	'$chunkContent',
	'--model_name',
	// 'tts_models/en/ljspeech/neural_hmm',
	'tts_models/en/jenny/jenny',
	'--out_path',
	chunkFile,
	'--use_cuda',
	'true',
	],
	);
	print(
	'${section.title} (Chunk $chunkIndex / ${contentChunks.length - 1}) ${process.stdout} ${process.stderr}\n\n');
	}

	// Merge the chunks
	if (chunkFiles.length == 1) {
	await chunkFiles.first.rename('$targetFileNoExt.wav');
	} else {
	final process = await Process.run(
	'/usr/bin/sox',
	[
	...chunkFiles.map((f) => f.path),
	'$targetFileNoExt.wav',
	],
	);
	print('${section.title} (Merge) ${process.stdout} ${process.stderr}\n\n');
	await Future.wait(chunkFiles.map((f) => f.delete()));
	}
	}

	/// Splits the input file into sections
	Iterable<Section> splitSections(List<String> lines) sync* {
	final buffer = StringBuffer();
	String title = lines.first;

	for (final line in lines.skip(1)) {
	if (line.startsWith(RegExp(r'^\d+\.\d+ '))) {
	// yield the previous section
	yield (title: title, content: buffer.toString());
	// start a new section
	buffer.clear();
	title = line;
	continue;
	}
	buffer.writeln(line);
	}

	// yield the last section
	yield (title: title, content: buffer.toString());
	}

	Future<void> main() async {
	final fullFile = File('ux.txt');
	final lines = await fullFile.readAsLines();
	print('Read ${lines.length} lines');

	int i = 0;
	for (final section in splitSections(lines)) {
	if (false) {
	print('Skipping ${section.title} (${section.content.length} chars)');
	} else {
	print('Processing ${section.title} (${section.content.length} chars)');
	await tts(i, section);
	}

	i++;
	}
	}