From e8b1808eaf87a49e4c34ebbfb66854baa627418c Mon Sep 17 00:00:00 2001
From: Tucker Evans <tuckerevans24@gmail.com>
Date: Mon, 18 Feb 2019 07:35:54 -0500
Subject: Moves assignments to given course folder.

---
 CSC2636/search/.gitignore     |   6 +
 CSC2636/search/README.rst     |  19 ++
 CSC2636/search/assign.rst     | 213 ++++++++++++++++++++++
 CSC2636/search/index/index.go | 165 +++++++++++++++++
 CSC2636/search/indexer.go     | 402 ++++++++++++++++++++++++++++++++++++++++++
 CSC2636/search/search.go      | 144 +++++++++++++++
 6 files changed, 949 insertions(+)
 create mode 100644 CSC2636/search/.gitignore
 create mode 100644 CSC2636/search/README.rst
 create mode 100644 CSC2636/search/assign.rst
 create mode 100644 CSC2636/search/index/index.go
 create mode 100644 CSC2636/search/indexer.go
 create mode 100644 CSC2636/search/search.go

(limited to 'CSC2636/search')

diff --git a/CSC2636/search/.gitignore b/CSC2636/search/.gitignore
new file mode 100644
index 0000000..7523492
--- /dev/null
+++ b/CSC2636/search/.gitignore
@@ -0,0 +1,6 @@
+*test*
+pages
+index.dat
+indexer
+search
+*.swp
diff --git a/CSC2636/search/README.rst b/CSC2636/search/README.rst
new file mode 100644
index 0000000..e1d14fb
--- /dev/null
+++ b/CSC2636/search/README.rst
@@ -0,0 +1,19 @@
+=============
+Search Engine
+=============
+
+Setup
+=====
+In order for search.go to use the index package the directory "index"
+must by copied (or linked) into a directory "search" that is in your
+GOPATH.
+
+About
+=====
+Search Engine for web science class.
+
+See assign.rst for assignment details.
+
+Authors
+=======
+- Tucker Evans
diff --git a/CSC2636/search/assign.rst b/CSC2636/search/assign.rst
new file mode 100644
index 0000000..66e537e
--- /dev/null
+++ b/CSC2636/search/assign.rst
@@ -0,0 +1,213 @@
+========================
+Project 2: Search Engine
+========================
+
+**CS2621– Web Science**
+
+*100 points*
+
+You are to create a web search engine that works at the command line.
+To do this, you will write two Python scripts, indexer.py and
+search.py. 
+
+Indexer
+=======
+
+Indexer.py should do the following:
+
+1. After performing a crawl (using your other Python script), read all
+   the HTML files that were stored in the “pages” directory. For each
+   document, extract the title and the text from the body of the page
+   (read the Beautiful Soup documentation to find out how). Beautiful
+   Soup will include the text of the page in the content of the page,
+   and that is OK. Beautiful Soup may also break on some pages and
+   include HTML as text, but we will not worry about these 
+   exceptions or bugs.
+
+2. All text should be converted to lowercase and non-alphanumeric
+   characters should be ignored. So “123-456” would become “123” and
+   “456”, and “joe@yahoo.com” would become “joe”, “yahoo”, “com”.
+   Ignore the following stop words: a, an, and, are, as, at, be, by,
+   for, from, has, he, in, is, it, its, of, on, that, the, to, was,
+   were, will, with. Do not perform stemming.
+
+3. A single inverted index should be created for the document corpus
+   which maintains the document ID (numbered 1…n in order of the pages
+   found in the “pages” directory), a 1 or 0 if the text is found in
+   the title, and the term frequency from the body (normalized by the
+   total number of tokens in the document after removing stop words).
+
+4. After indexer.py has finished indexing all the web pages, it should
+   output the index to index.dat which looks likethis:
+
+::
+
+  arkansas
+    6 0 0.022
+  model
+    1 0 0.309
+    3 0 0.015
+    5 1 0.001
+  tuesday
+    2 0 0.082
+  white
+    2 1 0.018
+  etc…
+
+.. note ::
+  The indexed words are alphabetized, and there are 3 spaces before
+  sets of three numbers (each separated by a single space) which are:
+  doc ID, title (0 or 1), and normalized body TF (rounded to 3 decimal
+  places). For example, the term white was found only in document 2;
+  it was somewhere in the title and made up 1.8% of all the words in
+  the document.
+
+5. It may take some time for your program to run, so you should output
+   information about the program’s status as it indexes the crawled
+   pages. Outputting what file is being worked on would be helpful to
+   the user who is waiting for the program to finish its work.
+
+Search
+======
+
+After the index is written to index.dat, the search.py script will
+allow the user to search the corpus for specific words.  Here is how
+it should operate:
+
+1. First, read the search phrase at the command line. Examples:
+
+  .. code :: bash
+
+    $ search.py bisons
+    $ search.py "landmark college"
+
+If no command line argument is supplied, the program should tell the
+user a search term is required and terminate.  Ignore any command-line
+arguments after the first.
+
+2. Next, the program should read the index from index.dat into memory.
+   Note that you may want to use similar data structures used in
+   indexer.py, so you should write your programs in a way where you
+   share code without having redundant code in each script. (It’s OK
+   to introduce new .py files to your project.)
+
+3. For simplicity, all queries will be assumed to use boolean ANDs,
+   and we will not implement phrase search. For example, the query
+   landmark college should generate a boolean search for landmark AND
+   college, so only documents containing both terms should be
+   considered amatch.
+
+4. Remove any stop words from the query as was done when indexing the
+   documents.
+
+5. After determining which documents match the search terms, calculate
+   the relevancy score for each document: relevancy score = 0.9 * body
+   TF + 0.1 * title score Do this for each term, and compute the
+   average relevancy score for all terms. So if the search was for
+   landmark college, you would compute the score for landmark and the
+   score for college and compute the average to determine the overall
+   relevancy score.
+
+6. The total number of results should first be displayed. Then display
+   every document ID and score (out to 3 decimal places) ordered by
+   score, and number the results. Example:
+
+.. code:: bash
+
+  Results: 4
+  1. docID, 3, score, 0.830
+  2. docID, 1, score, 0.814
+  3. docID, 5, score, 0.350
+  4. docID, 8, score, 0.108
+
+**Bonus:** You can receive 5 bonus points by implementing phrase search.
+So when the user searches for “landmark college”, assume they want
+only documents with that exact phrase. To accomplish this, you will
+need to store the positions of the terms that are stored in the
+inverted index. Then use those positions to ensure the phrase matches
+successive positions.  
+
+
+Zip your entire project directory and submit it
+to Canvas before it is due. Make sure your output matches the
+specifications precisely to avoid losing any points. If you use any
+code you find in the Web, you must document the source in your
+program.
+
+Test Data
+=========
+
+*a.html*
+
+.. code:: html
+
+  <title>cool!!! test!!!</title>
+  	<body>
+  		this 123-456.
+  	</body>
+
+*b.html*
+
+.. code:: html
+
+  <html>
+  	<head>
+  		<title>Go Patriots!</title>
+  	</head>
+  	<body>
+  		And another test and test!
+  	</body>
+  </html>
+
+*c.html*
+
+.. code:: html
+
+  <body>
+  	This is a test.
+  </body>
+
+*Inverted index:*
+
+.. code::
+
+  123
+  	a 0 0.200
+  456
+  	a 0 0.200
+  another
+  	b 0 0.200
+  cool
+  	a 1 0.200
+  patriots
+  	b 1 0.200
+  go
+  	b 1 0.200
+  test
+  	a 1 0.200
+  	c 0 0.500
+  	b 0 0.400
+  this
+  	a 0 0.200
+  	c 0 0.500
+
+Search for "test this" results in the following:
+
+::
+
+  Results: 2
+  1. docID c, score 0.450
+  2. docID a, score 0.230
+
+Search for "test patriots go" results in the following:
+
+::
+
+  Results: 1
+  1. docID b, score 0.310
+
+Search for "cool patriots" results in the following:
+
+::
+
+  Results: 0
diff --git a/CSC2636/search/index/index.go b/CSC2636/search/index/index.go
new file mode 100644
index 0000000..5d8ab65
--- /dev/null
+++ b/CSC2636/search/index/index.go
@@ -0,0 +1,165 @@
+package index
+
+import "fmt"
+import "os"
+import "io"
+import "bufio"
+import "sort"
+import "errors"
+import "strings"
+import "strconv"
+
+/* TODO
+
+   - Implement Forward Creation
+   - Implement Inverted from Forward
+   - Switch Indexer.go over to this package
+
+/*********
+ * Types *
+ *********/
+
+type F_info struct {
+	Word string;
+	In_title bool;
+	Freq float64;
+};
+
+type I_info struct {
+	Doc string;
+	In_title bool;
+	Freq float64;
+};
+
+type F_entry struct{
+	This *F_info;
+	Next *F_entry;
+};
+
+type I_entry struct{
+	This *I_info;
+	Next *I_entry;
+};
+
+type F_index map[string]*F_entry;
+type I_index map[string]*I_entry;
+
+type sortInverted struct{
+	w string;
+	root *I_entry;
+};
+
+
+/***************************
+ * Forward Index Funcitons *
+ ***************************/
+
+func NewForwardEntryStrings(text, title []string) (*F_entry, error) {
+	return nil, errors.New("not implemented");
+}
+
+/****************************
+ * Inverted Index Functions *
+ ****************************/
+
+func new_I_info() *I_info{
+	return &I_info{"", false, 0.0};
+}
+
+func NewInvertedIndexFromFile(fname string) (I_index, error) {
+	var fd *os.File;
+	var br *bufio.Reader;
+	var err error;
+	var buf []byte;
+	var tmp *I_info;
+	var cur *I_entry;
+	var index I_index;
+	var word string
+	var info []string;
+
+	fd, err = os.Open(fname);
+	if err != nil {
+		return nil, err;
+	}
+
+	br = bufio.NewReader(fd);
+	if br == nil {
+		return nil, errors.New("Could not initialize reader");
+	}
+
+	index = make(I_index);
+
+	for buf, err = br.ReadBytes('\n'); err != io.EOF;  buf, err = br.ReadBytes('\n'){
+		tmp = new_I_info();
+		if err != nil {
+			return nil, err;
+		}
+		if buf[0] != '\t' {
+			word = strings.TrimSpace(string(buf));
+		} else {
+			info = strings.Fields(string(buf));
+			tmp.Doc = info[0];
+			tmp.In_title = (info[1] == "1");
+			tmp.Freq, _ = strconv.ParseFloat(info[2], 32);
+			if (index[word] == nil) {
+				index[word] = &I_entry{This: tmp, Next: nil};
+			} else {
+				cur = index[word];
+				for cur.Next != nil {
+					cur = cur.Next;
+				}
+				cur.Next = &I_entry{This: tmp, Next: nil};
+			}
+		}
+	}
+
+	return index, nil;
+}
+
+func NewInvertedFromForward(f F_index) (I_index, error) {
+ return nil, errors.New("not implemented");
+
+}
+
+func (x I_index) PrintToFile(fd *os.File) error{
+	var i int;
+	var cur *I_entry;
+	var index []sortInverted;
+
+	index = x.sortIndex();
+	
+	for i = 0; i < len(index); i++ {
+		fmt.Fprintf(fd, "%s\n", index[i].w);
+		for cur = index[i].root; cur != nil; cur = cur.Next {
+			fmt.Fprintf(fd, "\t%s %d %.3f\n", cur.This.Doc, toInt(cur.This.In_title), cur.This.Freq);
+		}
+	}
+	return nil;
+}
+
+func toInt(t bool) int{
+	if (t){
+		return 1;
+	}
+	return 0;
+}
+
+func (unsort I_index) sortIndex() []sortInverted {
+	var i int;
+	var sorted []sortInverted;
+
+	sorted = make([]sortInverted, len(unsort));
+
+	i = 0;
+	for k, v := range unsort {
+		sorted[i].w = k;
+		sorted[i].root = v;
+		i++;
+	}
+
+	sort.Slice(sorted, func(i, j int) bool {
+		return sorted[i].w < sorted[j].w;
+	});
+	
+	return sorted
+}
diff --git a/CSC2636/search/indexer.go b/CSC2636/search/indexer.go
new file mode 100644
index 0000000..d95f126
--- /dev/null
+++ b/CSC2636/search/indexer.go
@@ -0,0 +1,402 @@
+package main
+
+import "os"
+import "sort"
+import "golang.org/x/net/html"
+import "log"
+import "fmt"
+import "github.com/PuerkitoBio/goquery"
+import "github.com/kennygrant/sanitize"
+import "strings"
+import "flag"
+import "errors"
+import "regexp"
+
+type document struct {
+	fname string;
+	title []string;
+	text []string;
+	length int;
+}
+
+type index struct {
+	doc *document;
+	title bool;
+	freq int;
+}
+
+type wordSort struct {
+	w string;
+	root *wordList;
+}
+
+type wordList struct {
+	this *index
+	next *wordList
+}
+
+var r, nonAN *regexp.Regexp;
+var stopWords []*regexp.Regexp;
+
+
+func newDocument() *document {
+	return &document{"" , nil, nil, 0};
+}
+
+func RemoveNode(r, rn *html.Node) {
+	var found bool;
+	var n, item *html.Node;
+	var nodes map[int]*html.Node;
+	var i, j int;
+
+	found = false;
+	nodes = make(map[int]*html.Node);
+
+	for n = r.FirstChild; n != nil; n = n.NextSibling {
+		if n == rn {
+			found = true;
+			n.Parent.RemoveChild(n);
+		}
+
+		nodes[i] = n;
+		i++;
+	}
+
+	if !found {
+		for j = 0; j < i; j++ {
+			item = nodes[j];
+			RemoveNode(item, rn);
+		}
+	}
+}
+func RemoveTag(doc *goquery.Selection, tag string) {
+	doc.Find(tag).Each(func(i int, s *goquery.Selection) {
+		RemoveNode(doc.Get(0), s.Get(0));
+	});
+}
+
+func logReg(h []byte) []byte {
+	log.Printf("RegExp: %s", h);
+	return h;
+}
+
+func parseDoc(fd *os.File, f_info os.FileInfo) (*document, error) {
+	var err error;
+	var text, t_text string;
+	var doc *goquery.Document;
+	var body, title *goquery.Selection;
+	var r_doc *document;
+	var i int;
+
+	doc, err = goquery.NewDocumentFromReader(fd);
+	if err != nil {
+		log.Printf("goquery error: %s\n", err);
+		return nil, errors.New("Can't create goquery documnt");
+	}
+
+	body = doc.Find("body");
+	RemoveTag(body, "script");
+	RemoveTag(body, "noscript");
+
+	title = doc.Find("title");
+
+	//TODO add error detection
+	text, err = body.Html();
+	t_text, err = title.Html();
+
+
+	text = r.ReplaceAllString(text, "> <");
+	t_text = r.ReplaceAllString(t_text, "> <");
+
+	text = sanitize.HTML(text);
+	t_text = sanitize.HTML(t_text);
+
+	text = strings.ToLower(text);
+	t_text = strings.ToLower(t_text);
+
+	text = nonAN.ReplaceAllString(text, " ");
+	t_text = nonAN.ReplaceAllString(t_text, " ");
+
+
+	for i = 0; i < len(stopWords); i++ {
+		text = stopWords[i].ReplaceAllString(text, " ");
+		t_text = stopWords[i].ReplaceAllString(t_text, " ");
+	}
+	r_doc = newDocument();
+
+	r_doc.fname = f_info.Name();
+	r_doc.text = strings.Fields(text);
+	r_doc.title = strings.Fields(t_text);
+	r_doc.length = len(r_doc.text) + len(r_doc.title);
+
+	return r_doc, nil;
+}
+func boolToInt(t bool) int {
+	if t {
+		return 1;
+	}
+	return 0;
+}
+
+func printIndex(words []wordSort, fd *os.File) {
+	var i int;
+	var cur *wordList;
+	var fname string;
+	var t int;
+	var freq float64;
+
+	for i = 0; i < len(words); i++ {
+		fmt.Fprintf(fd, "%s\n", words[i].w);
+		for cur = words[i].root; cur != nil; cur = cur.next {
+			fname = cur.this.doc.fname;
+			t = boolToInt(cur.this.title);
+			freq = float64(cur.this.freq) / float64(cur.this.doc.length);
+			
+			fmt.Fprintf(fd,"\t%s %d %.3f\n", fname, t, freq);
+		}
+	}
+}
+
+func init() {
+	var err error;
+	log.SetOutput(os.Stderr);
+	r, err = regexp.Compile("><");
+	if err != nil {
+		panic(err);
+	}
+	nonAN, err = regexp.Compile("[^a-zA-Z0-9]+");
+	if err != nil {
+		panic(err);
+	}
+	//TODO add func to read in stop words from a file;
+	stopWords = make([]*regexp.Regexp, 26)
+	if err != nil {
+		panic(err);
+	}
+	stopWords[0], err = regexp.Compile("\\W+and\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[1], err = regexp.Compile("\\W+a\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[2], err = regexp.Compile("\\W+an\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[3], err = regexp.Compile("\\W+and\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[4], err = regexp.Compile("\\W+are\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[5], err = regexp.Compile("\\W+as\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[6], err = regexp.Compile("\\W+at\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[7], err = regexp.Compile("\\W+be\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[8], err = regexp.Compile("\\W+by\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[9], err = regexp.Compile("\\W+for\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[10], err = regexp.Compile("\\W+from\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[11], err = regexp.Compile("\\W+has\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[12], err = regexp.Compile("\\W+he\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[13], err = regexp.Compile("\\W+in\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[14], err = regexp.Compile("\\W+is\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[15], err = regexp.Compile("\\W+it\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[16], err = regexp.Compile("\\W+its\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[17], err = regexp.Compile("\\W+of\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[18], err = regexp.Compile("\\W+on\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[19], err = regexp.Compile("\\W+that\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[20], err = regexp.Compile("\\W+the\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[21], err = regexp.Compile("\\W+to\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[22], err = regexp.Compile("\\W+was\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[23], err = regexp.Compile("\\W+were\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[24], err = regexp.Compile("\\W+will\\W+");
+	if err != nil {
+		panic(err);
+	}
+	stopWords[25], err = regexp.Compile("\\W+with\\W+");
+	if err != nil {
+		panic(err);
+	}
+}
+
+func main() {
+	//	var words map[string]index
+	var p_dir, w, fname string;
+	var err error;
+	var i, j int;
+	var words map[string]*wordList;
+	var cur *wordList;
+	var tmp *index;
+	var sorted []wordSort;
+
+	var files []os.FileInfo;
+	var dir, fd *os.File;
+	var dir_info, fd_info os.FileInfo;
+	var dir_mode os.FileMode;
+
+	var doc *document;
+
+	flag.StringVar(&p_dir, "d", "./pages", "pages directory");
+
+	flag.Parse();
+
+	words = make(map[string]*wordList);
+
+	dir, err = os.Open(p_dir);
+	if err != nil {
+		log.Printf("Error accessing \"%s\":\t%s\n", p_dir, err);
+		os.Exit(1);
+	}
+
+	dir_info, err = dir.Stat();
+	dir_mode = dir_info.Mode();
+
+	if !dir_mode.IsDir() {
+		log.Printf("\"%s\" is not a directory\n", p_dir);
+		os.Exit(1);
+	}
+
+	files, err = dir.Readdir(0);
+	if err != nil {
+		log.Printf("Error reading %s\n", p_dir);
+		os.Exit(1);
+	}
+
+	for i = 0; i < len(files); i++ {
+		fd, err = os.Open(fmt.Sprintf("%s/%s", dir_info.Name(), files[i].Name()));
+		fd_info, err = fd.Stat();
+		if err != nil {
+			log.Printf("Error getting info\n");
+			os.Exit(1);
+		}
+		fname = fd_info.Name();
+
+		if err != nil {
+			log.Printf("Error reading %s/%s\n", dir_info.Name(), files[i].Name());
+		} else {
+			fmt.Printf("Indexing %s...\n", fname);
+			doc, err = parseDoc(fd, fd_info);
+			if err != nil {
+				log.Printf("Error parsing %s/%s\n", dir_info.Name(), files[i].Name());
+			} else {
+				/* Text */
+				for j = 0; j < len(doc.text); j++ {
+					w = strings.ToLower(doc.text[j]);
+
+					if words[w] == nil{
+						tmp = &index{doc: doc, title: false, freq: 0};
+						words[w] = &wordList{this: tmp, next: nil};
+					}
+
+					for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{}
+
+					if cur.this.doc.fname == fname {
+						cur.this.freq++
+					} else if cur.next == nil {
+						tmp = &index{doc: doc, title: false, freq: 1};
+						cur.next = &wordList{this: tmp, next: nil};
+					} else {
+						panic(fmt.Sprintf("%v", cur));
+					}
+				}
+				/* Title */
+				for j = 0; j < len(doc.title); j++ {
+					w = strings.ToLower(doc.title[j]);
+
+					if words[w] == nil{
+						tmp = &index{doc: doc, title: true, freq: 0};
+						words[w] = &wordList{this: tmp, next: nil};
+					}
+
+					for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{}
+
+					if cur.this.doc.fname == fname {
+						cur.this.title = true;
+						cur.this.freq++;
+					} else if cur.next == nil {
+						tmp = &index{doc: doc, title: true, freq: 1};
+						cur.next = &wordList{this: tmp, next: nil};
+					} else {
+						panic(fmt.Sprintf("%v", cur));
+					}
+				}
+			}
+		}
+		fd.Close();
+	}
+		sorted = make([]wordSort, len(words));
+		i = 0;
+		for k,v := range words {
+			sorted[i].w = k;
+			sorted[i].root = v;
+			i++;
+		}
+		
+		sort.Slice(sorted, func(i, j int) bool {
+			return sorted[i].w < sorted[j].w;
+		});
+		
+		fd,_ = os.Create("index.dat");
+		printIndex(sorted, fd);
+		fd.Close();
+}
diff --git a/CSC2636/search/search.go b/CSC2636/search/search.go
new file mode 100644
index 0000000..c144055
--- /dev/null
+++ b/CSC2636/search/search.go
@@ -0,0 +1,144 @@
+/************************************************
+ *                 README                       *
+ * In order for search/index to be accessible   *
+ * you must link this folder (search) into your *
+ * GOPATH                                       *
+ ************************************************/
+
+
+package main
+
+import "search/index"
+import "os"
+import "fmt"
+import "sort"
+import "flag"
+import "strings"
+
+type res struct {
+	doc string;
+	score float64;
+};
+
+func main() {
+	var init_index, sIndex index.I_index;
+	var tmp, results, root *index.I_entry;
+	var tmp_score float64;
+	var scores map[string]map[string]float64; // scores[doc][word] == score
+	var i,j int;
+	var searchBool, perWord, docAdded map[string]bool; //map[doc]bool
+	var resultSort []res;
+	var err error;
+	var fname, s string;
+	var search []string;
+
+	flag.StringVar(&fname, "f", "./index.dat", "Index file");
+	flag.StringVar(&s, "s", "" , "Search phrase");
+
+	flag.Parse();
+	if len(s) == 0 {
+		fmt.Printf("Usage: search -s \"search phrase\" [-f index_file]");
+		os.Exit(1);
+	} else {
+		search = strings.Fields(s);
+	}
+
+	scores = make(map[string]map[string]float64);
+	searchBool = make(map[string]bool);
+	perWord = make(map[string]bool);
+	docAdded = make(map[string]bool);
+
+
+	sIndex = make(index.I_index);
+
+
+
+	init_index, err = index.NewInvertedIndexFromFile(fname);
+	if err != nil {
+		panic(err)
+	}
+	for i = 0; i < len(search); i++ {
+		sIndex[search[i]] = init_index[search[i]]
+	}
+
+	for _, v := range sIndex {
+		for tmp = v; tmp != nil; tmp = tmp.Next {
+			searchBool[tmp.This.Doc] = true;
+			scores[tmp.This.Doc] = make(map[string]float64);
+		}
+	}
+
+	for _, v := range sIndex {
+		for tmp = v; tmp != nil; tmp = tmp.Next {
+			perWord[tmp.This.Doc] = true;
+		}
+		for d := range searchBool {
+			if !perWord[d] {
+				searchBool[d] = false;
+			}
+		}
+		perWord = make(map[string]bool);
+	}
+
+	for k, v := range sIndex {
+		for tmp = v; tmp != nil; tmp = tmp.Next {
+			if searchBool[tmp.This.Doc] {
+				if tmp.This.In_title {
+					tmp_score = 1.0;
+				} else {
+					tmp_score = 0.0;
+				}
+
+				scores[tmp.This.Doc][k] = (0.9 * tmp.This.Freq) + (0.1 * tmp_score);
+			}
+		}
+
+	}
+
+	i = 0;
+	results = &index.I_entry{nil, nil}
+	root = &index.I_entry{nil, nil};
+	results.Next = root;
+
+	j = 0;
+
+	for _ ,v := range sIndex {
+		for tmp = v; tmp != nil; tmp = tmp.Next {
+			if (searchBool[tmp.This.Doc]) {
+				root.This = tmp.This;
+				docAdded[root.This.Doc] = false;
+				root.Next = &index.I_entry{nil, nil};
+				root = root.Next;
+			j++
+			}
+		}
+	}
+
+	resultSort = make([]res, j);
+
+	i = 0;
+	for root = results.Next; root.Next != nil; root = root.Next {
+		if (!docAdded[root.This.Doc]) {
+			j = 0;
+			tmp_score = 0;
+			for _ ,v := range scores[root.This.Doc] {
+				tmp_score += v;
+				j++;
+			}
+			tmp_score /= float64(j);
+			resultSort[i] = res{root.This.Doc, tmp_score};
+			docAdded[root.This.Doc] = true;
+			i++;
+		}
+	}
+	resultSort = resultSort[:i];
+
+		sort.Slice(resultSort, func(i, j int) bool {
+			return resultSort[i].score > resultSort[j].score;
+		});
+	
+	fmt.Printf("Results: %d\n", len(resultSort));
+	for i = 0; i < len(resultSort); i++ {
+		fmt.Printf("\t%d. Doc: %s, Score: %.3f\n", i, resultSort[i].doc, resultSort[i].score);
+	}
+}
-- 
cgit v1.1