一个简单的文本查询程序—摘至《C++ Primer》

这是在《C++ Primer》上第十章最后的一个小节。以前把这里漏掉了,刚才看了下,觉得这个程序很不错,便于对vector, map, set的基本掌握。特地把这一个小程序记录下来。

/

目的:一个简单的文本查询程序

作用:程序将读取用户指定的任意文本文件,然后允许用户从该文件中查找单词。

查询的结果是该单词出现的次数,并列出每次出现所在的行。

如果某单词在同一行中多次出现,程序将只显示该行一次。

行号按升序显示,即第 7 行应该在第 9 行之前输出,依此类推。

*/

/思路:

1.使用一个 vector 类型的对象存储整个输入文件的副本。

输入文件的每一行是该 vector 对象的一个元素。

因而,在希望输出某一行时,只需以行号为下标获取该行所在的元素即可。

2.将每个单词所在的行号存储在一个 set 容器对象中。

使用 set 就可确保每行只有一个条目,而且行号将自动按升序排列。

3.使用一个 map 容器将每个单词与一个 set 容器对象关联起来,

该 set 容器对象记录此单词所在的行号。

*/

TextQuery.H文件

#ifndef TEXTQUERY_H

#defineTEXTQUERY_H

#include
<string>

#include
<vector>

#include
<map>

#include
<set>

#include
<iostream>

#include
<fstream>

#include
<cctype>

#include
<cstring>



classTextQuery {

//as before

public:

//typedef to make declarations easier

typedef std::string::size_type str_size;

typedef std::vector
<std::string>::size_type line_no;



/interface:

read_file builds internal data structures for the given file

run_query finds the given word and returns set of lines on which it appears

text_line returns a requested line from the input file

*/

voidread_file(std::ifstream&is)

{ store_file(
is); build_map(); }

std::
set<line_no>run_query(conststd::string&)const;

std::
stringtext_line(line_no)const;

str_size size()
const{returnlines_of_text.size(); }

voiddisplay_map();//debugging aid: print the map



private:

//utility functions used by read_file

voidstore_file(std::ifstream&);//store input file

voidbuild_map();//associated each word with a set of line numbers



//remember the whole input file

std::vector<std::string>lines_of_text;



//map word to set of the lines on which it occurs

std::map<std::string, std::set<line_no>>word_map;

//characters that constitute whitespace

staticstd::stringwhitespace_chars;

//canonicalizes text: removes punctuation and makes everything lower case

staticstd::stringcleanup_str(conststd::string&);

};

#endif

TextQuery.CPP 文件

#include"TextQuery.h"

#include
<sstream>

#include
<string>

#include
<vector>

#include
<map>

#include
<set>

#include
<iostream>

#include
<fstream>

#include
<cctype>

#include
<cstring>

#include
<stdexcept>



usingstd::istringstream;

usingstd::set;

usingstd::string;

usingstd::getline;

usingstd::map;

usingstd::vector;

usingstd::cerr;

usingstd::cout;

usingstd::cin;

usingstd::ifstream;

usingstd::endl;

usingstd::ispunct;

usingstd::tolower;

usingstd::strlen;

usingstd::out_of_range;



stringTextQuery::text_line(line_no line)const

{

if(line<lines_of_text.size())

returnlines_of_text[line];

throwstd::out_of_range("line number out of range");

}



//read input file: store each line as element in lines_of_text

voidTextQuery::store_file(ifstream&is)

{

stringtextline;

while(getline(is, textline))

lines_of_text.push_back(textline);

}



//\v: vertical tab; \f: formfeed; \r: carriage return are

//treated as whitespace characters along with space, tab and newline

stringTextQuery::whitespace_chars("\t\n\v\r\f");



//finds whitespace-separated words in the input vector

//and puts the word in word_map along with the line number

voidTextQuery::build_map()

{

//process each line from the input vector

for(line_no line_num=0;

line_num
!=lines_of_text.size();

++line_num)

{

//we'll use line to read the text a word at a time

istringstream line(lines_of_text[line_num]);

stringword;

while(line>>word)

//add this line number to the set;

//subscript will add word to the map if it's not already there

word_map[cleanup_str(word)].insert(line_num);

}

}



set<TextQuery::line_no>

TextQuery::run_query(
conststring&query_word)const

{

//Note: must use find and not subscript the map directly

//to avoid adding words to word_map!

map<string,set<line_no>>::const_iterator

loc
=word_map.find(cleanup_str(query_word));

if(loc==word_map.end())

returnset<line_no>();//not found, return empty set

else

//fetch and return set of line numbers for this word

returnloc->second;

}



voidTextQuery::display_map()

{

map
<string,set<line_no>>::iterator iter=word_map.begin(),

iter_end
=word_map.end();



//for each word in the map

for( ; iter!=iter_end;++iter) {

cout
<<"word:"<<iter->first<<"{";



//fetch location vector as a const reference to avoid copying it

constset<line_no>&text_locs=iter->second;

set<line_no>::const_iterator loc_iter=text_locs.begin(),

loc_iter_end
=text_locs.end();



//print all line numbers for this word

while(loc_iter!=loc_iter_end)

{

cout
<<loc_iter;



if(++loc_iter!=loc_iter_end)

cout
<<",";



}



cout
<<"}\n";//end list of output this word

}

cout
<<endl;//finished printing entire map

}





//lower-case to upper-case

stringTextQuery::cleanup_str(conststring&word)

{

stringret;

for(string::const_iterator it=word.begin(); it!=word.end();++it) {

if(!ispunct(
it))

ret
+=tolower(*it);

}

returnret;

}

主函数

#include"TextQuery.h"

#include
<string>

#include
<vector>

#include
<map>

#include
<set>

#include
<iostream>

#include
<fstream>

#include
<cctype>

#include
<cstring>

#include
<cstdlib>



usingstd::set;

usingstd::string;

usingstd::map;

usingstd::vector;

usingstd::cerr;

usingstd::cout;

usingstd::cin;

usingstd::ifstream;

usingstd::endl;



stringmake_plural(size_t,conststring&,conststring&);

ifstream
&open_file(ifstream&,conststring&);



voidprint_results(constset<TextQuery::line_no>&locs,

conststring&sought,constTextQuery&file)

{

//if the word was found, then print count and all occurrences

typedefset<TextQuery::line_no>line_nums;

line_nums::size_type size
=locs.size();

cout
<<"\n"<<sought<<"occurs"

<<size<<""

<<make_plural(size,"time","s")<<endl;



//print each line in which the word appeared

line_nums::const_iterator it=locs.begin();

for( ; it!=locs.end();++it) {

cout
<<"\t(line"

//don't confound user with text lines starting at 0

<<(it)+1<<")"

<<file.text_line(
it)<<endl;

}

}





//program takes single argument specifying the file to query

intmain()

{

//open the file from which user will query words

ifstream infile;

if(!open_file(infile,"Tanky_Woo.txt")) {

cerr
<<"No input file!"<<endl;

returnEXIT_FAILURE;

}



TextQuery tq;

tq.read_file(infile);
//builds query map



//iterate with the user: prompt for a word to find and print results

//loop indefinitely; the loop exit is inside the while

while(true) {

cout
<<"enter word to look for, or q to quit:";

strings;

cin
>>s;



//stop if hit eof on input or a 'q' is entered

if(!cin||s=="q")break;



//get the set of line numbers on which this word appears

set<TextQuery::line_no>locs=tq.run_query(s);



//print count and all occurrences, if any

print_results(locs, s, tq);

}

return0;

}



stringmake_plural (size_t ctr ,conststring&word ,

conststring&ending)

{

return( ctr==1)?word : word+ending;

}



ifstream
&open_file(ifstream&in,conststring&file)

{

in.close();//close in case it was already open

in.clear();//clear any existing errors



//if the open fails, the stream will be in an invalid state

in.open(file.c_str());//open the file we were given



returnin;//condition state is good if open succeeded

}

原文链接: https://www.cnblogs.com/tanky_woo/archive/2010/11/11/1875168.html

欢迎关注

微信关注下方公众号,第一时间获取干货硬货;公众号内回复【pdf】免费获取数百本计算机经典书籍

原创文章受到原创版权保护。转载请注明出处:https://www.ccppcoding.com/archives/17218

非原创文章文中已经注明原地址,如有侵权,联系删除

关注公众号【高性能架构探索】,第一时间获取最新文章

转载文章受原作者版权保护。转载请注明原作者出处!

(0)
上一篇 2023年2月7日 下午5:45
下一篇 2023年2月7日 下午5:46

相关推荐