00001
00011
00012 #include "buffer.h"
00013
00014 namespace ace {
00015
00016
00017
00018 void Buffer::_init(void) {
00019
00020 _push();
00021
00022 if ( _context_window.radius != 0 ) {
00023
00024 _push(_context_window.radius, _context_window.mode);
00025 }
00026
00027
00028 _current = _sentences.begin();
00029 }
00030
00031
00032
00033 size_t Buffer::_pop(void) {
00034
00035 size_t dropped_sentence_length = _sentences.front().second;
00036
00037 _words.erase(_words.begin(), _words.begin() + dropped_sentence_length);
00038
00039 _sentences.pop_front();
00040
00041 for ( sentences_indices_t::iterator iter = _sentences.begin(); iter != _sentences.end(); ++iter ) {
00042 iter->first -= dropped_sentence_length;
00043 }
00044 return dropped_sentence_length;
00045 }
00046
00047
00048
00049 void Buffer::_push(size_t needed, ContextWindow::Mode context_window_mode) {
00050
00051 size_t index = _words.size();
00052
00053 if ( _parser.next(needed, context_window_mode, _words) ) {
00054
00055 const Parser::parsed_counter_t& parsed = _parser.parsed();
00056 for ( size_t i = 0; i < parsed.size(); ++i ) {
00057
00058 _sentences.push_back(sentence_index_t(index, parsed[i]));
00059 index += parsed[i];
00060
00061 ++_stats.sentences;
00062 _stats.words += parsed[i];
00063 }
00064 }
00065 }
00066
00067
00068
00069 words_range_t Buffer::context_range(words_store_t::const_iterator left, words_store_t::const_iterator right) const {
00070
00071 size_t left_index = _index_of(left), right_index = _index_of(right);
00072 words_store_t::const_iterator first, second;
00073 if ( left_index >= _context_window.radius ) {
00074 first = _words.begin() + (left_index - _context_window.radius);
00075 } else {
00076 first = _words.begin();
00077 }
00078 if ( (_words.size() - (right_index + 1)) >= _context_window.radius ) {
00079 second = _words.end() - (_words.size() - (right_index + 1 + _context_window.radius));
00080 } else {
00081 second = _words.end();
00082 }
00083 return words_range_t(first, second);
00084 }
00085
00086
00087
00088 bool Buffer::next(void) {
00089 if ( empty() ) {
00090
00091 return false;
00092 }
00093
00094
00095
00096 if ( !_parser.eof() ) {
00097
00098 if ( _context_window.mode == ContextWindow::SENTENCE_MODE ) {
00099
00100 _push();
00101 } else {
00102
00103 sentences_indices_t::iterator next_one = _current; ++next_one;
00104
00105
00106
00107
00108
00109 size_t reserve = (_words.size() - (next_one->first + next_one->second));
00110 size_t needed = (_context_window.radius > reserve) ? (_context_window.radius - reserve) : 0;
00111 if ( needed > 0 ) {
00112 _push(needed, ContextWindow::WORD_MODE);
00113 }
00114 }
00115 }
00116
00117
00118 ++_current;
00119 if ( _current == _sentences.end() ) {
00120
00121 return false;
00122 }
00123
00124
00125
00126 if ( _context_window.mode == ContextWindow::SENTENCE_MODE ) {
00127
00128 _pop();
00129 } else {
00130
00131 if ( _index_of(current().first) > _context_window.radius ) {
00132
00133 size_t reserve = _index_of(current().first) - _context_window.radius;
00134 while ( reserve > _sentences.front().second ) {
00135
00136
00137 reserve -= _pop();
00138 }
00139 }
00140
00141 }
00142
00143 return true;
00144 }
00145
00146 words_range_t Buffer::current(void) const {
00147
00148 return words_range_t(_words.begin() + _current->first, _words.begin() + _current->first + _current->second);
00149 }
00150
00151 }