9 #include "ParseText.hh"
12 #include "stemming/english_stem.h"
13 #include <ext/hash_map>
19 #define KEY "gBZjV4BQFHLsnOnD72hRRtBI50aWolQA"
22 #define GOOGLEURL "http://api.google.com/search/beta2"
23 #define GOOGLEACTION "urn:GoogleSearchAction"
28 int doSearch(
char* pcQuery,
char*** pppcTitles,
char*** pppcSnippets,
char*** pppcURLs,
int iMaxNumber);
32 static const int MIN_WORD_OCCURENCE=2;
33 static const char* pcFileName =
"./tmp.html";
34 static const int TIMEOUT = 7;
35 static const char* DATABASE =
"google";
45 void computeMV(
CDataset<float>* pDataset,
char* pcQuery,
int iMaxResults=500);
52 void setHtmlFiltering(
bool bHtmlFiltering);
55 void createBOW(
char* pcQuery,
int iMaxResults);
62 int readWebSite(
char* pcURL,
string* psDest);
65 int writeToDB(
int ID,
char* pcTitles,
char* pcText,
char* pcURL);
68 int safeSite(
char* pcURL);
71 int site2String(
string* psDest);
77 void rmHTMLTags(
string* psTemp,
int iStart);
80 vector<BagOfWords> vBags;
88 vector< pair<int,int> > vpDictWords;
92 vector< stdext::hash_map<int,int> > vhIDBags;
95 vector<string> vsID2String;
98 stdext::hash_map<int,int> hID2Comp;
101 vector<string> vsComp2String;
int doSearch(char *pcQuery, char ***pppcTitles, char ***pppcSnippets, char ***pppcURLs, int iMaxNumber)
Definition: mvGenerator.h:37