aboutsummaryrefslogtreecommitdiff
path: root/search/index.go
blob: 7f202fbe64290a8caade83bfd7f182fe245d2918 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
package main


import "os"
import "log"
import "fmt"
import "github.com/PuerkitoBio/goquery"
import "strings"
import "flag"
import "errors"

type index struct {
	doc string;
	title bool;
	freq int;
}

type document struct{
	title []string;
	text []string;
}


func newDocument() (*document) {
	return &document{nil, nil};
}

func parseDoc(fd *os.File) (*document, error) {
	var err error;
	var text, t_text string;
	var doc *goquery.Document; 
	var body, title *goquery.Selection;
	var r_doc *document;
	
	doc, err = goquery.NewDocumentFromReader(fd);
	if (err != nil) {
		log.Printf("goquery error: %s\n", err);
		return nil, errors.New("Can't create goquery documnt");
	}
	
	//TODO test kennygrant/sanatize instead of goquery
	body = doc.Find("body").Not("style").Not("script");
	title = doc.Find("title");
	
	text = body.Text();
	t_text = title.Text();

	r_doc = newDocument();

	r_doc.text = strings.Fields(text);
	r_doc.title = strings.Fields(t_text);
	if (len(r_doc.text) == 1) {
		log.Printf("not splittin!!!!!!!!!!!\n");
		os.Exit(1);
	}

	return r_doc, nil;
}

func init() {
	log.SetOutput(os.Stderr);
}

func main() {
//	var words map[string]index
	var p_dir string//, fname string;
	var err error;
	var i int;

	var files []os.FileInfo;
	var dir, fd *os.File;
	var dir_info os.FileInfo;
	var dir_mode os.FileMode;

	var doc *document;

	flag.StringVar(&p_dir, "d", "./pages", "pages directory");

	flag.Parse();
	
	dir, err = os.Open(p_dir);
	if (err != nil) {
		log.Printf("Error accessing \"%s\":\t%s\n", p_dir, err);
		os.Exit(1);
	}

	dir_info, err = dir.Stat();
	dir_mode = dir_info.Mode();
	
	if (!dir_mode.IsDir()) {
		log.Printf("\"%s\" is not a directory\n", p_dir);
		os.Exit(1);
	}

	files, err = dir.Readdir(0);
	if (err != nil) {
		log.Printf("Error reading %s\n", p_dir);
		os.Exit(1);
	}

	for i=0; i < len(files) && i < 1; i++ {
		fd, err = os.Open(fmt.Sprintf("%s/%s", dir_info.Name(), files[i].Name()));
		if (err != nil) {
			log.Printf("Error reading %s/%s\n", dir_info.Name(), files[i].Name());
		} else {
			doc, err = parseDoc(fd);
			if (err != nil) {
				log.Printf("Error parsing %s/%s\n", dir_info.Name(), files[i].Name());
			} else {
				fmt.Println(doc.text);
				fmt.Println(doc.title);
			}
		}
	}

}