Skip to content

Instantly share code, notes, and snippets.

@kamawanu
Created December 17, 2016 10:55
Show Gist options
  • Save kamawanu/c9ec6a8bcabd54fa542654704b937571 to your computer and use it in GitHub Desktop.
Save kamawanu/c9ec6a8bcabd54fa542654704b937571 to your computer and use it in GitHub Desktop.
#!/bin/bash
TEMP=/tmp/gett$$
trap "/bin/rm -rf $TEMP" 0 3 6 9 15
mkdir -p $TEMP
SELF=$( realpath $0 )
[ $# == 0 ] && exit 9
DIR=$1
shift
[ -s $DIR ] || exit 9
TV=$( tesseract --version 2>&1 | awk '$1 == "tesseract" {print $2}' )
CONFIG=config
if [ -f $SELF.$CONFIG.$TV ]
then
CONFIG=$CONFIG.$TV
fi
tesseract -l jpn $DIR $TEMP/name $SELF.$CONFIG > /dev/null 2>&1
[ -s $TEMP/title.txt ] || exit 1
cat $TEMP/title.txt
editor_image_xpos 590
editor_image_ypos 10
editor_image_menuheight 50
editor_image_word_bb_color 7
editor_image_blob_bb_color 4
editor_image_text_color 2
editor_dbwin_xpos 50
editor_dbwin_ypos 500
editor_dbwin_height 24
editor_dbwin_width 80
editor_word_xpos 60
editor_word_ypos 510
editor_word_height 240
editor_word_width 655
textord_debug_tabfind 0
textord_debug_bugs 0
textord_testregion_left -1
textord_testregion_top -1
textord_testregion_right 2147483647
textord_testregion_bottom 2147483647
textord_tabfind_show_partitions 0
devanagari_split_debuglevel 0
edges_max_children_per_outline 40
edges_max_children_layers 5
edges_children_per_grandchild 10
edges_children_count_limit 45
edges_min_nonhole 12
edges_patharea_ratio 40
textord_fp_chop_error 2
textord_tabfind_show_images 0
textord_skewsmooth_offset 4
textord_skewsmooth_offset2 1
textord_test_x -2147483647
textord_test_y -2147483647
textord_min_blobs_in_row 4
textord_spline_minblobs 8
textord_spline_medianwin 6
textord_max_blob_overlaps 4
textord_min_xheight 10
textord_lms_line_trials 12
oldbl_holed_losscount 10
pitsync_linear_version 6
pitsync_fake_depth 1
textord_tabfind_show_strokewidths 0
textord_dotmatrix_gap 3
textord_debug_block 0
textord_pitch_range 2
textord_words_veto_power 5
classify_num_cp_levels 3
equationdetect_save_bi_image 0
equationdetect_save_spt_image 0
equationdetect_save_seed_image 0
equationdetect_save_merged_image 0
textord_debug_images 0
textord_debug_printable 0
textord_space_size_is_variable 0
textord_tabfind_show_initial_partitions 0
textord_tabfind_show_reject_blobs 0
textord_tabfind_show_columns 0
textord_tabfind_show_blocks 0
textord_tabfind_find_tables 1
textord_tabfind_show_color_fit 0
devanagari_split_debugimage 0
textord_show_fixed_cuts 0
edges_use_new_outline_complexity 0
edges_debug 0
edges_children_fix 0
gapmap_debug 0
gapmap_use_ends 0
gapmap_no_isolated_quanta 0
textord_heavy_nr 0
textord_show_initial_rows 0
textord_show_parallel_rows 0
textord_show_expanded_rows 0
textord_show_final_rows 0
textord_show_final_blobs 0
textord_test_landscape 0
textord_parallel_baselines 1
textord_straight_baselines 0
textord_old_baselines 1
textord_old_xheight 0
textord_fix_xheight_bug 1
textord_fix_makerow_bug 1
textord_debug_xheights 0
textord_biased_skewcalc 1
textord_interpolating_skew 1
textord_new_initial_xheight 1
textord_debug_blob 0
textord_really_old_xheight 0
textord_oldbl_debug 0
textord_debug_baselines 0
textord_oldbl_paradef 1
textord_oldbl_split_splines 1
textord_oldbl_merge_parts 1
oldbl_corrfix 1
oldbl_xhfix 0
textord_ocropus_mode 0
textord_tabfind_only_strokewidths 0
textord_tabfind_vertical_text 1
textord_tabfind_force_vertical_text 0
textord_tabfind_vertical_horizontal_mix 1
textord_tabfind_show_initialtabs 0
textord_tabfind_show_finaltabs 0
textord_dump_table_images 0
textord_show_tables 0
textord_tablefind_show_mark 0
textord_tablefind_show_stats 0
textord_tablefind_recognize_tables 0
textord_all_prop 0
textord_debug_pitch_test 0
textord_disable_pitch_test 0
textord_fast_pitch_test 0
textord_debug_pitch_metric 0
textord_show_row_cuts 0
textord_show_page_cuts 0
textord_pitch_cheat 0
textord_blockndoc_fixed 0
textord_show_initial_words 0
textord_show_new_words 0
textord_show_fixed_words 0
textord_blocksall_fixed 0
textord_blocksall_prop 0
textord_blocksall_testing 0
textord_test_mode 0
textord_pitch_scalebigwords 0
textord_restore_underlines 1
textord_fp_chopping 1
textord_force_make_prop_words F
textord_chopper_test 0
wordrec_display_all_blobs 0
wordrec_display_all_words 0
wordrec_blob_pause 0
poly_debug 0
poly_wide_objects_better 1
wordrec_display_splits 0
editor_image_win_name EditorImage
editor_dbwin_name EditorDBWin
editor_word_name BlnWords
editor_debug_config_file
fx_debugfile FXDebug
classify_font_name UnknownFont
classify_training_file MicroFeatures
debug_file
textord_underline_threshold 0.5
edges_childarea 0.5
edges_boxarea 0.875
textord_fp_chop_snap 0.5
gapmap_big_gaps 1.75
textord_spline_shift_fraction 0.02
textord_spline_outlier_fraction 0.1
textord_skew_ile 0.5
textord_skew_lag 0.02
textord_linespace_iqrlimit 0.2
textord_width_limit 8
textord_chop_width 1.5
textord_expansion_factor 1
textord_overlap_x 0.375
textord_minxh 0.25
textord_min_linesize 1.25
textord_excess_blobsize 1.3
textord_occupancy_threshold 0.4
textord_underline_width 2
textord_min_blob_height_fraction 0.75
textord_xheight_mode_fraction 0.4
textord_ascheight_mode_fraction 0.08
textord_descheight_mode_fraction 0.08
textord_ascx_ratio_min 1.25
textord_ascx_ratio_max 1.8
textord_descx_ratio_min 0.25
textord_descx_ratio_max 0.6
textord_xheight_error_margin 0.1
oldbl_xhfract 0.4
oldbl_dot_error_size 1.26
textord_oldbl_jumplimit 0.15
pitsync_joined_edge 0.75
pitsync_offset_freecut_fraction 0.25
textord_tabfind_vertical_text_ratio 0.5
textord_tabfind_aligned_gap_fraction 0.75
textord_tabvector_vertical_gap_fraction 0.5
textord_tabvector_vertical_box_ratio 0.5
textord_projection_scale 0.2
textord_balance_factor 1
textord_wordstats_smooth_factor 0.05
textord_width_smooth_factor 0.1
textord_words_width_ile 0.4
textord_words_maxspace 4
textord_words_default_maxspace 3.5
textord_words_default_minspace 0.6
textord_words_min_minspace 0.3
textord_words_default_nonspace 0.2
textord_words_initial_lower 0.25
textord_words_initial_upper 0.15
textord_words_minlarge 0.75
textord_words_pitchsd_threshold 0.04
textord_words_def_fixed 0.016
textord_words_def_prop 0.09
textord_pitch_rowsimilarity 0.08
words_initial_lower 0.5
words_initial_upper 0.15
words_default_prop_nonspace 0.25
words_default_fixed_space 0.75
words_default_fixed_limit 0.6
textord_words_definite_spread 0.3
textord_spacesize_ratiofp 2.8
textord_spacesize_ratioprop 2
textord_fpiqr_ratio 1.5
textord_max_pitch_iqr 0.2
textord_fp_min_width 0.5
textord_underline_offset 0.1
classify_cp_angle_pad_loose 45
classify_cp_angle_pad_medium 20
classify_cp_angle_pad_tight 10
classify_cp_end_pad_loose 0.5
classify_cp_end_pad_medium 0.5
classify_cp_end_pad_tight 0.5
classify_cp_side_pad_loose 2.5
classify_cp_side_pad_medium 1.2
classify_cp_side_pad_tight 0.6
classify_pp_angle_pad 45
classify_pp_end_pad 0.5
classify_pp_side_pad 2.5
classify_min_slope 0.414214
classify_max_slope 2.41421
classify_norm_adj_midpoint 32
classify_norm_adj_curl 2
classify_pico_feature_length 0.05
ambigs_debug_level 0
tessedit_single_match 0
classify_debug_level 0
classify_norm_method 1
matcher_debug_level 0
matcher_debug_flags 0
classify_learning_debug_level 0
matcher_permanent_classes_min 1
matcher_min_examples_for_prototyping 3
matcher_sufficient_examples_for_prototyping 5
classify_adapt_proto_threshold 230
classify_adapt_feature_threshold 230
classify_class_pruner_threshold 229
classify_class_pruner_multiplier 15
classify_cp_cutoff_strength 7
classify_integer_matcher_multiplier 10
il1_adaption_test 0
dawg_debug_level 0
hyphen_debug_level 0
max_viterbi_list_size 10
stopper_smallword_size 2
stopper_debug_level 0
tessedit_truncate_wordchoice_log 10
fragments_debug 0
max_permuter_attempts 10000
repair_unchopped_blobs 1
chop_debug 0
chop_split_length 10000
chop_same_distance 2
chop_min_outline_points 6
chop_seam_pile_size 150
chop_inside_angle -50
chop_min_outline_area 2000
chop_centered_maxwidth 90
chop_x_y_weight 3
segment_adjust_debug 0
wordrec_debug_level 0
wordrec_max_join_chunks 4
segsearch_debug_level 0
segsearch_max_pain_points 2000
segsearch_max_futile_classifications 20
language_model_debug_level 0
language_model_ngram_order 8
language_model_viterbi_list_max_num_prunable 10
language_model_viterbi_list_max_size 500
language_model_min_compound_length 3
wordrec_display_segmentations 0
tessedit_pageseg_mode 6
tessedit_ocr_engine_mode 0
pageseg_devanagari_split_strategy 0
ocr_devanagari_split_strategy 0
bidi_debug 0
applybox_debug 1
applybox_page 0
tessedit_bigram_debug 0
debug_x_ht_level 0
quality_min_initial_alphas_reqd 2
tessedit_tess_adaption_mode 39
tessedit_test_adaption_mode 3
paragraph_debug_level 0
cube_debug_level 0
tessedit_preserve_min_wd_len 2
crunch_rating_max 10
crunch_pot_indicators 1
crunch_leave_lc_strings 4
crunch_leave_uc_strings 4
crunch_long_repetitions 3
crunch_debug 0
fixsp_non_noise_limit 1
fixsp_done_mode 1
debug_fix_space_level 0
x_ht_acceptance_tolerance 8
x_ht_min_change 8
superscript_debug 0
suspect_level 99
suspect_space_level 100
suspect_short_words 2
tessedit_reject_mode 0
tessedit_image_border 2
min_sane_x_ht_pixels 8
tessedit_page_number -1
tessdata_manager_debug_level 0
tessedit_parallelize 0
tessedit_ok_mode 5
segment_debug 0
language_model_fixed_length_choices_depth 3
tosp_debug_level 0
tosp_enough_space_samples_for_median 3
tosp_redo_kern_limit 10
tosp_few_samples 40
tosp_short_row 20
tosp_sanity_method 1
textord_max_noise_size 7
textord_baseline_debug 0
textord_noise_sizefraction 10
textord_noise_translimit 16
textord_noise_sncount 1
use_definite_ambigs_for_classifier 0
use_ambigs_for_adaption 0
prioritize_division 0
classify_enable_learning 1
tess_cn_matching 0
tess_bn_matching 0
classify_enable_adaptive_matcher 1
classify_use_pre_adapted_templates 0
classify_save_adapted_templates 0
classify_enable_adaptive_debugger 0
classify_nonlinear_norm 0
disable_character_fragments 1
classify_debug_character_fragments 0
matcher_debug_separate_windows 0
classify_bln_numeric_mode 0
load_system_dawg 1
load_freq_dawg 1
load_unambig_dawg 1
load_punc_dawg 1
load_number_dawg 1
load_bigram_dawg 1
use_only_first_uft8_step 0
stopper_no_acceptable_choices 0
save_raw_choices 1
segment_nonalphabetic_script 0
save_doc_words 0
merge_fragments_in_matrix 1
wordrec_no_block 0
wordrec_enable_assoc 1
force_word_assoc 0
fragments_guide_chopper 0
chop_enable T
chop_vertical_creep 0
chop_new_seam_pile 1
assume_fixed_pitch_char_segment 0
wordrec_skip_no_truth_words 0
wordrec_debug_blamer 0
wordrec_run_blamer 0
save_alt_choices 1
language_model_ngram_on 0
language_model_ngram_use_only_first_uft8_step 0
language_model_ngram_space_delimited_language 1
language_model_use_sigmoidal_certainty 0
tessedit_resegment_from_boxes 0
tessedit_resegment_from_line_boxes 0
tessedit_train_from_boxes 0
tessedit_make_boxes_from_boxes 0
tessedit_dump_pageseg_images 0
tessedit_ambigs_training 0
tessedit_adaption_debug 0
applybox_learn_chars_and_char_frags_mode 0
applybox_learn_ngrams_mode 0
tessedit_display_outwords 0
tessedit_training_tess 0
tessedit_dump_choices 0
tessedit_timing_debug 0
tessedit_fix_fuzzy_spaces 1
tessedit_unrej_any_wd 0
tessedit_fix_hyphens 1
tessedit_redo_xheight 1
tessedit_enable_doc_dict 1
tessedit_debug_fonts 0
tessedit_debug_block_rejection 0
tessedit_enable_bigram_correction 1
debug_acceptable_wds 0
tessedit_minimal_rej_pass1 0
tessedit_test_adaption 0
tessedit_matcher_log 0
test_pt 0
paragraph_text_based 1
docqual_excuse_outline_errs 0
tessedit_good_quality_unrej 1
tessedit_use_reject_spaces 1
tessedit_preserve_blk_rej_perfect_wds 1
tessedit_preserve_row_rej_perfect_wds 1
tessedit_dont_blkrej_good_wds 0
tessedit_dont_rowrej_good_wds 0
tessedit_row_rej_good_docs 1
tessedit_reject_bad_qual_wds 1
tessedit_debug_doc_rejection 0
tessedit_debug_quality_metrics 0
bland_unrej 0
unlv_tilde_crunching 1
crunch_early_merge_tess_fails 1
crunch_early_convert_bad_unlv_chs 0
crunch_terrible_garbage 1
crunch_pot_garbage 1
crunch_leave_ok_strings 1
crunch_accept_ok 1
crunch_leave_accept_strings 0
crunch_include_numerals 0
tessedit_prefer_joined_punct 0
tessedit_write_block_separators 0
tessedit_write_rep_codes 0
tessedit_write_unlv 0
tessedit_create_hocr 0
tessedit_create_pdf 0
suspect_constrain_1Il 0
tessedit_minimal_rejection 0
tessedit_zero_rejection 0
tessedit_word_for_word 0
tessedit_zero_kelvin_rejection 0
tessedit_consistent_reps 1
tessedit_rejection_debug 0
tessedit_flip_0O 1
rej_trust_doc_dawg 0
rej_1Il_use_dict_word 0
rej_1Il_trust_permuter_type 1
rej_use_tess_accepted 1
rej_use_tess_blanks 1
rej_use_good_perm 1
rej_use_sensible_wd 0
rej_alphas_in_number_perm 0
tessedit_create_boxfile 0
tessedit_write_images 0
interactive_display_mode 0
tessedit_override_permuter 1
tessedit_use_primary_params_model 0
textord_tabfind_show_vlines 0
textord_use_cjk_fp_model 0
poly_allow_detailed_fx 0
tessedit_init_config_only 0
textord_equation_detect 0
load_fixed_length_dawgs 1
permute_debug 0
permute_script_word 0
segment_segcost_rating F
permute_fixed_length_dawg 0
permute_chartype_word 0
ngram_permuter_activated 0
permute_only_top 0
use_new_state_cost F
enable_new_segsearch 0
textord_single_height_mode 0
tosp_old_to_method 0
tosp_old_to_constrain_sp_kn 0
tosp_only_use_prop_rows 1
tosp_force_wordbreak_on_punct 0
tosp_use_pre_chopping 0
tosp_old_to_bug_fix 0
tosp_block_use_cert_spaces 1
tosp_row_use_cert_spaces 1
tosp_narrow_blobs_not_cert 1
tosp_row_use_cert_spaces1 1
tosp_recovery_isolated_row_stats 1
tosp_only_small_gaps_for_kern 0
tosp_all_flips_fuzzy 0
tosp_fuzzy_limit_all 1
tosp_stats_use_xht_gaps 1
tosp_use_xht_gaps 1
tosp_only_use_xht_gaps 0
tosp_rule_9_test_punct 0
tosp_flip_fuzz_kn_to_sp 1
tosp_flip_fuzz_sp_to_kn 1
tosp_improve_thresh 0
textord_no_rejects 0
textord_show_blobs 0
textord_show_boxes 0
textord_noise_rejwords 1
textord_noise_rejrows 1
textord_noise_debug 0
m_data_sub_dir tessdata/
classify_learn_debug_str
user_words_suffix
user_patterns_suffix
output_ambig_words_file
word_to_debug
word_to_debug_lengths
tessedit_char_blacklist
tessedit_char_whitelist
tessedit_write_params_to_file
applybox_exposure_pattern .exp
chs_leading_punct ('`"
chs_trailing_punct1 ).,;:?!
chs_trailing_punct2 )'`"
outlines_odd %|
outlines_2 ij!?%":;
numeric_punctuation .,
unrecognised_char |
ok_repeated_ch_non_alphanum_wds -?*=
conflict_set_I_l_1 Il1[]
file_type .tif
tessedit_load_sublangs
classify_char_norm_range 0.2
classify_min_norm_scale_x 0
classify_max_norm_scale_x 0.325
classify_min_norm_scale_y 0
classify_max_norm_scale_y 0.325
classify_max_rating_ratio 1.5
classify_max_certainty_margin 5.5
matcher_good_threshold 0.125
matcher_great_threshold 0
matcher_perfect_threshold 0.02
matcher_bad_match_pad 0.15
matcher_rating_margin 0.1
matcher_avg_noise_size 12
matcher_clustering_max_angle_delta 0.015
classify_misfit_junk_penalty 0
rating_scale 1.5
certainty_scale 20
tessedit_class_miss_scale 0.00390625
classify_adapted_pruning_factor 2.5
classify_adapted_pruning_threshold -1
classify_character_fragments_garbage_certainty_threshold -3
speckle_large_max_size 0.3
speckle_rating_penalty 10
xheight_penalty_subscripts 0.125
xheight_penalty_inconsistent 0.25
segment_penalty_dict_frequent_word 1
segment_penalty_dict_case_ok 1.1
segment_penalty_dict_case_bad 1.3125
segment_penalty_ngram_best_choice 1.24
segment_penalty_dict_nonword 1.25
segment_penalty_garbage 1.5
certainty_scale 20
stopper_nondict_certainty_base -2.5
stopper_phase2_certainty_rejection_offset 1
stopper_certainty_per_char -0.5
stopper_allowable_character_badness 3
doc_dict_pending_threshold 0
doc_dict_certainty_threshold -2.25
wordrec_worst_state 1
tessedit_certainty_threshold -2.25
chop_split_dist_knob 0.5
chop_overlap_knob 0.9
chop_center_knob 0.15
chop_sharpness_knob 0.06
chop_width_change_knob 5
chop_ok_split 100
chop_good_split 50
segsearch_max_char_wh_ratio 2
language_model_ngram_small_prob 1e-06
language_model_ngram_nonmatch_score -40
language_model_ngram_scale_factor 0.03
language_model_ngram_rating_factor 16
language_model_penalty_non_freq_dict_word 0.1
language_model_penalty_non_dict_word 0.15
language_model_penalty_punc 0.2
language_model_penalty_case 0.1
language_model_penalty_script 0.5
language_model_penalty_chartype 0.3
language_model_penalty_font 0
language_model_penalty_spacing 0.05
language_model_penalty_increment 0.01
quality_rej_pc 0.08
quality_blob_pc 0
quality_outline_pc 1
quality_char_pc 0.95
test_pt_x 100000
test_pt_y 100000
tessedit_reject_doc_percent 65
tessedit_reject_block_percent 45
tessedit_reject_row_percent 40
tessedit_whole_wd_rej_row_percent 70
tessedit_good_doc_still_rowrej_wd 1.1
quality_rowrej_pc 1.1
crunch_terrible_rating 80
crunch_poor_garbage_cert -9
crunch_poor_garbage_rate 60
crunch_pot_poor_rate 40
crunch_pot_poor_cert -8
crunch_del_rating 60
crunch_del_cert -10
crunch_del_min_ht 0.7
crunch_del_max_ht 3
crunch_del_min_width 3
crunch_del_high_word 1.5
crunch_del_low_word 0.5
crunch_small_outlines_size 0.6
fixsp_small_outlines_size 0.28
superscript_worse_certainty 2
superscript_bettered_certainty 0.97
superscript_scaledown_ratio 0.4
subscript_max_y_top 0.5
superscript_min_y_bottom 0.3
suspect_rating_per_ch 999.9
suspect_accept_rating -999.9
tessedit_lower_flip_hyphen 1.5
tessedit_upper_flip_hyphen 1.8
rej_whole_of_mostly_reject_word_fract 0.85
min_orientation_margin 7
bestrate_pruning_factor 2
segment_reward_script 0.95
segment_reward_chartype 0.97
segment_reward_ngram_best_choice 0.99
heuristic_segcost_rating_base 1.25
heuristic_weight_rating 1
heuristic_weight_width 1000
heuristic_weight_seamcut 0
heuristic_max_char_wh_ratio 2
segsearch_max_fixed_pitch_char_wh_ratio 2
tosp_old_sp_kn_th_factor 2
tosp_threshold_bias1 0
tosp_threshold_bias2 0
tosp_narrow_fraction 0.3
tosp_narrow_aspect_ratio 0.48
tosp_wide_fraction 0.52
tosp_wide_aspect_ratio 0
tosp_fuzzy_space_factor 0.6
tosp_fuzzy_space_factor1 0.5
tosp_fuzzy_space_factor2 0.72
tosp_gap_factor 0.83
tosp_kern_gap_factor1 2
tosp_kern_gap_factor2 1.3
tosp_kern_gap_factor3 2.5
tosp_ignore_big_gaps -1
tosp_ignore_very_big_gaps 3.5
tosp_rep_space 1.6
tosp_enough_small_gaps 0.65
tosp_table_kn_sp_ratio 2.25
tosp_table_xht_sp_ratio 0.33
tosp_table_fuzzy_kn_sp_ratio 3
tosp_fuzzy_kn_fraction 0.5
tosp_fuzzy_sp_fraction 0.5
tosp_min_sane_kn_sp 1.5
tosp_init_guess_kn_mult 2.2
tosp_init_guess_xht_mult 0.28
tosp_max_sane_kn_thresh 5
tosp_flip_caution 0
tosp_large_kerning 0.19
tosp_dont_fool_with_small_kerns -1
tosp_near_lh_edge 0
tosp_silly_kn_sp_gap 0.2
tosp_pass_wide_fuzz_sp_to_context 0.75
textord_blob_size_bigile 95
textord_noise_area_ratio 0.7
textord_blob_size_smallile 20
textord_initialx_ile 0.75
textord_initialasc_ile 0.9
textord_noise_sizelimit 0.5
textord_noise_normratio 2
textord_noise_syfract 0.2
textord_noise_sxfract 0.4
textord_noise_hfract 0.015625
textord_noise_rowratio 6
textord_blshift_maxshift 0
textord_blshift_xfraction 9.99
editor_image_xpos 590
editor_image_ypos 10
editor_image_menuheight 50
editor_image_word_bb_color 7
editor_image_blob_bb_color 4
editor_image_text_color 2
editor_dbwin_xpos 50
editor_dbwin_ypos 500
editor_dbwin_height 24
editor_dbwin_width 80
editor_word_xpos 60
editor_word_ypos 510
editor_word_height 240
editor_word_width 655
textord_debug_tabfind 0
textord_debug_bugs 0
textord_testregion_left -1
textord_testregion_top -1
textord_testregion_right 2147483647
textord_testregion_bottom 2147483647
textord_tabfind_show_partitions 0
devanagari_split_debuglevel 0
edges_max_children_per_outline 40
edges_max_children_layers 5
edges_children_per_grandchild 10
edges_children_count_limit 45
edges_min_nonhole 12
edges_patharea_ratio 40
textord_fp_chop_error 2
textord_tabfind_show_images 0
textord_skewsmooth_offset 4
textord_skewsmooth_offset2 1
textord_test_x -2147483647
textord_test_y -2147483647
textord_min_blobs_in_row 4
textord_spline_minblobs 8
textord_spline_medianwin 6
textord_max_blob_overlaps 4
textord_min_xheight 10
textord_lms_line_trials 12
oldbl_holed_losscount 10
pitsync_linear_version 6
pitsync_fake_depth 1
textord_tabfind_show_strokewidths 0
textord_dotmatrix_gap 3
textord_debug_block 0
textord_pitch_range 2
textord_words_veto_power 5
classify_num_cp_levels 3
equationdetect_save_bi_image 0
equationdetect_save_spt_image 0
equationdetect_save_seed_image 0
equationdetect_save_merged_image 0
textord_debug_images 0
textord_debug_printable 0
textord_space_size_is_variable 0
textord_tabfind_show_initial_partitions 0
textord_tabfind_show_reject_blobs 0
textord_tabfind_show_columns 0
textord_tabfind_show_blocks 0
textord_tabfind_find_tables 1
textord_tabfind_show_color_fit 0
devanagari_split_debugimage 0
textord_show_fixed_cuts 0
edges_use_new_outline_complexity 0
edges_debug 0
edges_children_fix 0
gapmap_debug 0
gapmap_use_ends 0
gapmap_no_isolated_quanta 0
textord_heavy_nr 0
textord_show_initial_rows 0
textord_show_parallel_rows 0
textord_show_expanded_rows 0
textord_show_final_rows 0
textord_show_final_blobs 0
textord_test_landscape 0
textord_parallel_baselines 1
textord_straight_baselines 0
textord_old_baselines 1
textord_old_xheight 0
textord_fix_xheight_bug 1
textord_fix_makerow_bug 1
textord_debug_xheights 0
textord_biased_skewcalc 1
textord_interpolating_skew 1
textord_new_initial_xheight 1
textord_debug_blob 0
textord_really_old_xheight 0
textord_oldbl_debug 0
textord_debug_baselines 0
textord_oldbl_paradef 1
textord_oldbl_split_splines 1
textord_oldbl_merge_parts 1
oldbl_corrfix 1
oldbl_xhfix 0
textord_ocropus_mode 0
textord_tabfind_only_strokewidths 0
textord_tabfind_vertical_text 1
textord_tabfind_force_vertical_text 0
textord_tabfind_vertical_horizontal_mix 1
textord_tabfind_show_initialtabs 0
textord_tabfind_show_finaltabs 0
textord_dump_table_images 0
textord_show_tables 0
textord_tablefind_show_mark 0
textord_tablefind_show_stats 0
textord_tablefind_recognize_tables 0
textord_all_prop 0
textord_debug_pitch_test 0
textord_disable_pitch_test 0
textord_fast_pitch_test 0
textord_debug_pitch_metric 0
textord_show_row_cuts 0
textord_show_page_cuts 0
textord_pitch_cheat 0
textord_blockndoc_fixed 0
textord_show_initial_words 0
textord_show_new_words 0
textord_show_fixed_words 0
textord_blocksall_fixed 0
textord_blocksall_prop 0
textord_blocksall_testing 0
textord_test_mode 0
textord_pitch_scalebigwords 0
textord_restore_underlines 1
textord_fp_chopping 1
textord_force_make_prop_words F
textord_chopper_test 0
wordrec_display_all_blobs 0
wordrec_display_all_words 0
wordrec_blob_pause 0
poly_debug 0
poly_wide_objects_better 1
wordrec_display_splits 0
editor_image_win_name EditorImage
editor_dbwin_name EditorDBWin
editor_word_name BlnWords
editor_debug_config_file
fx_debugfile FXDebug
classify_font_name UnknownFont
classify_training_file MicroFeatures
debug_file
textord_underline_threshold 0.5
edges_childarea 0.5
edges_boxarea 0.875
textord_fp_chop_snap 0.5
gapmap_big_gaps 1.75
textord_spline_shift_fraction 0.02
textord_spline_outlier_fraction 0.1
textord_skew_ile 0.5
textord_skew_lag 0.02
textord_linespace_iqrlimit 0.2
textord_width_limit 8
textord_chop_width 1.5
textord_expansion_factor 1
textord_overlap_x 0.375
textord_minxh 0.25
textord_min_linesize 1.25
textord_excess_blobsize 1.3
textord_occupancy_threshold 0.4
textord_underline_width 2
textord_min_blob_height_fraction 0.75
textord_xheight_mode_fraction 0.4
textord_ascheight_mode_fraction 0.08
textord_descheight_mode_fraction 0.08
textord_ascx_ratio_min 1.25
textord_ascx_ratio_max 1.8
textord_descx_ratio_min 0.25
textord_descx_ratio_max 0.6
textord_xheight_error_margin 0.1
oldbl_xhfract 0.4
oldbl_dot_error_size 1.26
textord_oldbl_jumplimit 0.15
pitsync_joined_edge 0.75
pitsync_offset_freecut_fraction 0.25
textord_tabfind_vertical_text_ratio 0.5
textord_tabfind_aligned_gap_fraction 0.75
textord_tabvector_vertical_gap_fraction 0.5
textord_tabvector_vertical_box_ratio 0.5
textord_projection_scale 0.2
textord_balance_factor 1
textord_wordstats_smooth_factor 0.05
textord_width_smooth_factor 0.1
textord_words_width_ile 0.4
textord_words_maxspace 4
textord_words_default_maxspace 3.5
textord_words_default_minspace 0.6
textord_words_min_minspace 0.3
textord_words_default_nonspace 0.2
textord_words_initial_lower 0.25
textord_words_initial_upper 0.15
textord_words_minlarge 0.75
textord_words_pitchsd_threshold 0.04
textord_words_def_fixed 0.016
textord_words_def_prop 0.09
textord_pitch_rowsimilarity 0.08
words_initial_lower 0.5
words_initial_upper 0.15
words_default_prop_nonspace 0.25
words_default_fixed_space 0.75
words_default_fixed_limit 0.6
textord_words_definite_spread 0.3
textord_spacesize_ratiofp 2.8
textord_spacesize_ratioprop 2
textord_fpiqr_ratio 1.5
textord_max_pitch_iqr 0.2
textord_fp_min_width 0.5
textord_underline_offset 0.1
classify_cp_angle_pad_loose 45
classify_cp_angle_pad_medium 20
classify_cp_angle_pad_tight 10
classify_cp_end_pad_loose 0.5
classify_cp_end_pad_medium 0.5
classify_cp_end_pad_tight 0.5
classify_cp_side_pad_loose 2.5
classify_cp_side_pad_medium 1.2
classify_cp_side_pad_tight 0.6
classify_pp_angle_pad 45
classify_pp_end_pad 0.5
classify_pp_side_pad 2.5
classify_min_slope 0.414214
classify_max_slope 2.41421
classify_norm_adj_midpoint 32
classify_norm_adj_curl 2
classify_pico_feature_length 0.05
ambigs_debug_level 0
tessedit_single_match 0
classify_debug_level 0
classify_norm_method 1
matcher_debug_level 0
matcher_debug_flags 0
classify_learning_debug_level 0
matcher_permanent_classes_min 1
matcher_min_examples_for_prototyping 3
matcher_sufficient_examples_for_prototyping 5
classify_adapt_proto_threshold 230
classify_adapt_feature_threshold 230
classify_class_pruner_threshold 229
classify_class_pruner_multiplier 15
classify_cp_cutoff_strength 7
classify_integer_matcher_multiplier 10
il1_adaption_test 0
dawg_debug_level 0
hyphen_debug_level 0
max_viterbi_list_size 10
stopper_smallword_size 2
stopper_debug_level 0
tessedit_truncate_wordchoice_log 10
fragments_debug 0
max_permuter_attempts 10000
repair_unchopped_blobs 1
chop_debug 0
chop_split_length 10000
chop_same_distance 2
chop_min_outline_points 6
chop_seam_pile_size 150
chop_inside_angle -50
chop_min_outline_area 2000
chop_centered_maxwidth 90
chop_x_y_weight 3
segment_adjust_debug 0
wordrec_debug_level 0
wordrec_max_join_chunks 4
segsearch_debug_level 0
segsearch_max_pain_points 2000
segsearch_max_futile_classifications 20
language_model_debug_level 0
language_model_ngram_order 8
language_model_viterbi_list_max_num_prunable 10
language_model_viterbi_list_max_size 500
language_model_min_compound_length 3
wordrec_display_segmentations 0
tessedit_pageseg_mode 6
tessedit_ocr_engine_mode 0
pageseg_devanagari_split_strategy 0
ocr_devanagari_split_strategy 0
bidi_debug 0
applybox_debug 1
applybox_page 0
tessedit_bigram_debug 0
debug_x_ht_level 0
quality_min_initial_alphas_reqd 2
tessedit_tess_adaption_mode 39
tessedit_test_adaption_mode 3
paragraph_debug_level 0
cube_debug_level 0
tessedit_preserve_min_wd_len 2
crunch_rating_max 10
crunch_pot_indicators 1
crunch_leave_lc_strings 4
crunch_leave_uc_strings 4
crunch_long_repetitions 3
crunch_debug 0
fixsp_non_noise_limit 1
fixsp_done_mode 1
debug_fix_space_level 0
x_ht_acceptance_tolerance 8
x_ht_min_change 8
superscript_debug 0
suspect_level 99
suspect_space_level 100
suspect_short_words 2
tessedit_reject_mode 0
tessedit_image_border 2
min_sane_x_ht_pixels 8
tessedit_page_number -1
tessdata_manager_debug_level 0
tessedit_parallelize 0
tessedit_ok_mode 5
segment_debug 0
language_model_fixed_length_choices_depth 3
tosp_debug_level 0
tosp_enough_space_samples_for_median 3
tosp_redo_kern_limit 10
tosp_few_samples 40
tosp_short_row 20
tosp_sanity_method 1
textord_max_noise_size 7
textord_baseline_debug 0
textord_noise_sizefraction 10
textord_noise_translimit 16
textord_noise_sncount 1
use_definite_ambigs_for_classifier 0
use_ambigs_for_adaption 0
prioritize_division 0
classify_enable_learning 1
tess_cn_matching 0
tess_bn_matching 0
classify_enable_adaptive_matcher 1
classify_use_pre_adapted_templates 0
classify_save_adapted_templates 0
classify_enable_adaptive_debugger 0
classify_nonlinear_norm 0
disable_character_fragments 1
classify_debug_character_fragments 0
matcher_debug_separate_windows 0
classify_bln_numeric_mode 0
load_system_dawg 1
load_freq_dawg 1
load_unambig_dawg 1
load_punc_dawg 1
load_number_dawg 1
load_bigram_dawg 1
use_only_first_uft8_step 0
stopper_no_acceptable_choices 0
save_raw_choices 1
segment_nonalphabetic_script 0
save_doc_words 0
merge_fragments_in_matrix 1
wordrec_no_block 0
wordrec_enable_assoc 1
force_word_assoc 0
fragments_guide_chopper 0
chop_enable T
chop_vertical_creep 0
chop_new_seam_pile 1
assume_fixed_pitch_char_segment 0
wordrec_skip_no_truth_words 0
wordrec_debug_blamer 0
wordrec_run_blamer 0
save_alt_choices 1
language_model_ngram_on 0
language_model_ngram_use_only_first_uft8_step 0
language_model_ngram_space_delimited_language 1
language_model_use_sigmoidal_certainty 0
tessedit_resegment_from_boxes 0
tessedit_resegment_from_line_boxes 0
tessedit_train_from_boxes 0
tessedit_make_boxes_from_boxes 0
tessedit_dump_pageseg_images 0
tessedit_ambigs_training 0
tessedit_adaption_debug 0
applybox_learn_chars_and_char_frags_mode 0
applybox_learn_ngrams_mode 0
tessedit_display_outwords 0
#tessedit_training_tess 0
tessedit_dump_choices 0
tessedit_timing_debug 0
tessedit_fix_fuzzy_spaces 1
tessedit_unrej_any_wd 0
tessedit_fix_hyphens 1
tessedit_redo_xheight 1
tessedit_enable_doc_dict 1
tessedit_debug_fonts 0
tessedit_debug_block_rejection 0
tessedit_enable_bigram_correction 1
debug_acceptable_wds 0
tessedit_minimal_rej_pass1 0
tessedit_test_adaption 0
tessedit_matcher_log 0
test_pt 0
paragraph_text_based 1
docqual_excuse_outline_errs 0
tessedit_good_quality_unrej 1
tessedit_use_reject_spaces 1
tessedit_preserve_blk_rej_perfect_wds 1
tessedit_preserve_row_rej_perfect_wds 1
tessedit_dont_blkrej_good_wds 0
tessedit_dont_rowrej_good_wds 0
tessedit_row_rej_good_docs 1
tessedit_reject_bad_qual_wds 1
tessedit_debug_doc_rejection 0
tessedit_debug_quality_metrics 0
bland_unrej 0
unlv_tilde_crunching 1
crunch_early_merge_tess_fails 1
crunch_early_convert_bad_unlv_chs 0
crunch_terrible_garbage 1
crunch_pot_garbage 1
crunch_leave_ok_strings 1
crunch_accept_ok 1
crunch_leave_accept_strings 0
crunch_include_numerals 0
tessedit_prefer_joined_punct 0
tessedit_write_block_separators 0
tessedit_write_rep_codes 0
tessedit_write_unlv 0
tessedit_create_hocr 0
tessedit_create_pdf 0
suspect_constrain_1Il 0
tessedit_minimal_rejection 0
tessedit_zero_rejection 0
tessedit_word_for_word 0
tessedit_zero_kelvin_rejection 0
tessedit_consistent_reps 1
tessedit_rejection_debug 0
tessedit_flip_0O 1
rej_trust_doc_dawg 0
rej_1Il_use_dict_word 0
rej_1Il_trust_permuter_type 1
rej_use_tess_accepted 1
rej_use_tess_blanks 1
rej_use_good_perm 1
rej_use_sensible_wd 0
rej_alphas_in_number_perm 0
tessedit_create_boxfile 0
tessedit_write_images 0
interactive_display_mode 0
tessedit_override_permuter 1
tessedit_use_primary_params_model 0
textord_tabfind_show_vlines 0
textord_use_cjk_fp_model 0
poly_allow_detailed_fx 0
tessedit_init_config_only 0
textord_equation_detect 0
load_fixed_length_dawgs 1
permute_debug 0
permute_script_word 0
segment_segcost_rating F
permute_fixed_length_dawg 0
permute_chartype_word 0
ngram_permuter_activated 0
permute_only_top 0
use_new_state_cost F
enable_new_segsearch 0
textord_single_height_mode 0
tosp_old_to_method 0
tosp_old_to_constrain_sp_kn 0
tosp_only_use_prop_rows 1
tosp_force_wordbreak_on_punct 0
tosp_use_pre_chopping 0
tosp_old_to_bug_fix 0
tosp_block_use_cert_spaces 1
tosp_row_use_cert_spaces 1
tosp_narrow_blobs_not_cert 1
tosp_row_use_cert_spaces1 1
tosp_recovery_isolated_row_stats 1
tosp_only_small_gaps_for_kern 0
tosp_all_flips_fuzzy 0
tosp_fuzzy_limit_all 1
tosp_stats_use_xht_gaps 1
tosp_use_xht_gaps 1
tosp_only_use_xht_gaps 0
tosp_rule_9_test_punct 0
tosp_flip_fuzz_kn_to_sp 1
tosp_flip_fuzz_sp_to_kn 1
tosp_improve_thresh 0
textord_no_rejects 0
textord_show_blobs 0
textord_show_boxes 0
textord_noise_rejwords 1
textord_noise_rejrows 1
textord_noise_debug 0
m_data_sub_dir tessdata/
classify_learn_debug_str
user_words_suffix
user_patterns_suffix
output_ambig_words_file
word_to_debug
word_to_debug_lengths
tessedit_char_blacklist
tessedit_char_whitelist
tessedit_write_params_to_file
applybox_exposure_pattern .exp
chs_leading_punct ('`"
chs_trailing_punct1 ).,;:?!
chs_trailing_punct2 )'`"
outlines_odd %|
outlines_2 ij!?%":;
numeric_punctuation .,
unrecognised_char |
ok_repeated_ch_non_alphanum_wds -?*=
conflict_set_I_l_1 Il1[]
file_type .tif
tessedit_load_sublangs
classify_char_norm_range 0.2
classify_min_norm_scale_x 0
classify_max_norm_scale_x 0.325
classify_min_norm_scale_y 0
classify_max_norm_scale_y 0.325
classify_max_rating_ratio 1.5
classify_max_certainty_margin 5.5
matcher_good_threshold 0.125
#matcher_great_threshold 0
matcher_perfect_threshold 0.02
matcher_bad_match_pad 0.15
matcher_rating_margin 0.1
matcher_avg_noise_size 12
matcher_clustering_max_angle_delta 0.015
classify_misfit_junk_penalty 0
rating_scale 1.5
certainty_scale 20
tessedit_class_miss_scale 0.00390625
classify_adapted_pruning_factor 2.5
classify_adapted_pruning_threshold -1
classify_character_fragments_garbage_certainty_threshold -3
speckle_large_max_size 0.3
speckle_rating_penalty 10
xheight_penalty_subscripts 0.125
xheight_penalty_inconsistent 0.25
segment_penalty_dict_frequent_word 1
segment_penalty_dict_case_ok 1.1
segment_penalty_dict_case_bad 1.3125
segment_penalty_ngram_best_choice 1.24
segment_penalty_dict_nonword 1.25
segment_penalty_garbage 1.5
certainty_scale 20
stopper_nondict_certainty_base -2.5
stopper_phase2_certainty_rejection_offset 1
stopper_certainty_per_char -0.5
stopper_allowable_character_badness 3
doc_dict_pending_threshold 0
doc_dict_certainty_threshold -2.25
wordrec_worst_state 1
tessedit_certainty_threshold -2.25
chop_split_dist_knob 0.5
chop_overlap_knob 0.9
chop_center_knob 0.15
chop_sharpness_knob 0.06
chop_width_change_knob 5
chop_ok_split 100
chop_good_split 50
segsearch_max_char_wh_ratio 2
language_model_ngram_small_prob 1e-06
language_model_ngram_nonmatch_score -40
language_model_ngram_scale_factor 0.03
language_model_ngram_rating_factor 16
language_model_penalty_non_freq_dict_word 0.1
language_model_penalty_non_dict_word 0.15
language_model_penalty_punc 0.2
language_model_penalty_case 0.1
language_model_penalty_script 0.5
language_model_penalty_chartype 0.3
language_model_penalty_font 0
language_model_penalty_spacing 0.05
language_model_penalty_increment 0.01
quality_rej_pc 0.08
quality_blob_pc 0
quality_outline_pc 1
quality_char_pc 0.95
test_pt_x 100000
test_pt_y 100000
tessedit_reject_doc_percent 65
tessedit_reject_block_percent 45
tessedit_reject_row_percent 40
tessedit_whole_wd_rej_row_percent 70
tessedit_good_doc_still_rowrej_wd 1.1
quality_rowrej_pc 1.1
crunch_terrible_rating 80
crunch_poor_garbage_cert -9
crunch_poor_garbage_rate 60
crunch_pot_poor_rate 40
crunch_pot_poor_cert -8
crunch_del_rating 60
crunch_del_cert -10
crunch_del_min_ht 0.7
crunch_del_max_ht 3
crunch_del_min_width 3
crunch_del_high_word 1.5
crunch_del_low_word 0.5
crunch_small_outlines_size 0.6
fixsp_small_outlines_size 0.28
superscript_worse_certainty 2
superscript_bettered_certainty 0.97
superscript_scaledown_ratio 0.4
subscript_max_y_top 0.5
superscript_min_y_bottom 0.3
suspect_rating_per_ch 999.9
suspect_accept_rating -999.9
tessedit_lower_flip_hyphen 1.5
tessedit_upper_flip_hyphen 1.8
rej_whole_of_mostly_reject_word_fract 0.85
min_orientation_margin 7
bestrate_pruning_factor 2
segment_reward_script 0.95
segment_reward_chartype 0.97
segment_reward_ngram_best_choice 0.99
heuristic_segcost_rating_base 1.25
heuristic_weight_rating 1
heuristic_weight_width 1000
heuristic_weight_seamcut 0
heuristic_max_char_wh_ratio 2
segsearch_max_fixed_pitch_char_wh_ratio 2
tosp_old_sp_kn_th_factor 2
tosp_threshold_bias1 0
tosp_threshold_bias2 0
tosp_narrow_fraction 0.3
tosp_narrow_aspect_ratio 0.48
tosp_wide_fraction 0.52
tosp_wide_aspect_ratio 0
tosp_fuzzy_space_factor 0.6
tosp_fuzzy_space_factor1 0.5
tosp_fuzzy_space_factor2 0.72
tosp_gap_factor 0.83
tosp_kern_gap_factor1 2
tosp_kern_gap_factor2 1.3
tosp_kern_gap_factor3 2.5
tosp_ignore_big_gaps -1
tosp_ignore_very_big_gaps 3.5
tosp_rep_space 1.6
tosp_enough_small_gaps 0.65
tosp_table_kn_sp_ratio 2.25
tosp_table_xht_sp_ratio 0.33
tosp_table_fuzzy_kn_sp_ratio 3
tosp_fuzzy_kn_fraction 0.5
tosp_fuzzy_sp_fraction 0.5
tosp_min_sane_kn_sp 1.5
tosp_init_guess_kn_mult 2.2
tosp_init_guess_xht_mult 0.28
tosp_max_sane_kn_thresh 5
tosp_flip_caution 0
tosp_large_kerning 0.19
tosp_dont_fool_with_small_kerns -1
tosp_near_lh_edge 0
tosp_silly_kn_sp_gap 0.2
tosp_pass_wide_fuzz_sp_to_context 0.75
textord_blob_size_bigile 95
textord_noise_area_ratio 0.7
textord_blob_size_smallile 20
textord_initialx_ile 0.75
textord_initialasc_ile 0.9
textord_noise_sizelimit 0.5
textord_noise_normratio 2
textord_noise_syfract 0.2
textord_noise_sxfract 0.4
textord_noise_hfract 0.015625
textord_noise_rowratio 6
textord_blshift_maxshift 0
textord_blshift_xfraction 9.99
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment