00001 00009 #include <cstdlib> 00010 00011 #include "config.h" 00012 00013 namespace ace { 00014 00020 size_t _ascii2size_t(const char *str) { 00021 int i = atoi(str); 00022 return (i < 0) ? 0 : i; 00023 } 00024 00025 void parse_params(int argc, char** argv) { 00026 for ( int i = 0; i < argc; ++i ) { 00027 if ( argv[i][0] == '-' ) { 00028 switch ( argv[i][1] ) { 00029 case 'c': 00030 // Context window. 00031 switch ( argv[i][2] ) { 00032 case 'f': 00033 settings::context_filter_file = argv[++i]; 00034 break; 00035 case 's': 00036 settings::context_window.mode = ContextWindow::SENTENCE_MODE; 00037 settings::context_window.radius = _ascii2size_t(argv[++i]); 00038 break; 00039 // case 't': 00040 // settings::context_tag_mask = argv[++i]; 00041 // break; 00042 case 'w': 00043 settings::context_window.mode = ContextWindow::WORD_MODE; 00044 settings::context_window.radius = _ascii2size_t(argv[++i]); 00045 break; 00046 case constants::null_char: 00047 // Output file. 00048 settings::context_output_file = argv[++i]; 00049 break; 00050 } 00051 break; 00052 case 'd': 00053 // Directory with input datafiles. 00054 settings::dir = argv[++i]; 00055 break; 00056 case 'f': 00057 // Morphologic filter... 00058 switch ( argv[i][2] ) { 00059 case 's': 00060 // ...stats (output) filename. 00061 settings::morphologic_filter_stats_file = argv[++i]; 00062 break; 00063 case constants::null_char: 00064 // ...rules (input) filename. 00065 settings::morphologic_filter_file = argv[++i]; 00066 break; 00067 } 00068 break; 00069 case 'i': 00070 // Input filename. 00071 settings::input_file = argv[++i]; 00072 break; 00073 case 'm': 00074 // Bitmask for morphologic tags. 00075 settings::morphologic_tag_mask = argv[++i]; 00076 break; 00077 case 'n': 00078 // N for N-gram. 00079 settings::n = static_cast<ngram_size_t>(_ascii2size_t(argv[++i])); 00080 break; 00081 case 'o': 00082 // Output filename. 00083 settings::output_file = argv[++i]; 00084 break; 00085 case 'p': 00086 // Output statistics precision. 00087 settings::precision = _ascii2size_t(argv[++i]); 00088 break; 00089 case 's': 00090 // Stats output filename. 00091 settings::stats_file = argv[++i]; 00092 break; 00093 case 't': 00094 // Threshold (for pre-evaluation or post-evaluation filter) 00095 switch ( argv[i][2] ) { 00096 case 'a': 00097 settings::all_thresholds_together = true; 00098 break; 00099 case 'c': 00100 settings::thresholds.chi_square_test = atof(argv[++i]); 00101 break; 00102 case 'e': 00103 settings::sieves.expected_frequency = atof(argv[++i]); 00104 break; 00105 case 'f': 00106 settings::sieves.frequency = _ascii2size_t(argv[++i]); 00107 break; 00108 case 'l': 00109 settings::thresholds.log_likelihood_ratio = atof(argv[++i]); 00110 break; 00111 case 'm': 00112 settings::thresholds.mutual_information = atof(argv[++i]); 00113 break; 00114 case 'o': 00115 settings::all_thresholds_together = false; 00116 break; 00117 case 'p': 00118 settings::thresholds.pearsons_coefficient = atof(argv[++i]); 00119 break; 00120 case 't': 00121 settings::thresholds.t_test = atof(argv[++i]); 00122 break; 00123 case 'z': 00124 settings::thresholds.z_score = atof(argv[++i]); 00125 break; 00126 } 00127 break; 00128 // TODO: Implement or drop. 00129 // case 'v': 00130 // // Verbose. 00131 // settings::verbose = true; 00132 // break; 00133 #ifdef _SURFACE_MODE 00134 case 'w': 00135 // Collocation window size. 00136 settings::collocation_window_size = _ascii2size_t(argv[++i]); 00137 break; 00138 #endif 00139 #ifndef _USE_HASHSET 00140 case 'x': 00141 // Performance-related settings. 00142 switch ( argv[i][2] ) { 00143 case 'b': 00144 settings::bucket_size = _ascii2size_t(argv[++i]); 00145 break; 00146 case 'c': 00147 settings::crop_ratio = _ascii2size_t(argv[++i]); 00148 break; 00149 case 's': 00150 settings::sort = (_ascii2size_t(argv[++i]) != 0); 00151 break; 00152 } 00153 #endif 00154 default: 00155 // TODO: Warn? 00156 ; 00157 } 00158 } 00159 } 00160 } 00161 00162 } // namespace ace
1.5.6