aboutsummaryrefslogtreecommitdiff
path: root/search
diff options
context:
space:
mode:
authorTucker Evans <tuckerevans24@gmail.com>2017-12-09 16:17:41 -0500
committerTucker Evans <tuckerevans24@gmail.com>2017-12-17 13:20:31 -0500
commit204113558a3d2764fbdc4f8643d63ad5e6c330e1 (patch)
treee308bd67f33a273b2fe09e389fd0404fbf8dcb58 /search
parenta0eccf37c6538ac4ef64be22e4510a6255203525 (diff)
CSC2621/assignments/search: Fixed parseDoc func; Added .gitignore
Diffstat (limited to 'search')
-rw-r--r--search/.gitignore4
-rw-r--r--search/index.go18
2 files changed, 17 insertions, 5 deletions
diff --git a/search/.gitignore b/search/.gitignore
new file mode 100644
index 0000000..81a686a
--- /dev/null
+++ b/search/.gitignore
@@ -0,0 +1,4 @@
+*test*
+pages
+index
+search
diff --git a/search/index.go b/search/index.go
index 0d60b8a..9b1bc0c 100644
--- a/search/index.go
+++ b/search/index.go
@@ -9,6 +9,7 @@ import "github.com/kennygrant/sanitize"
import "strings"
import "flag"
import "errors"
+import "regexp"
type index struct {
doc string;
@@ -21,6 +22,8 @@ type document struct {
text []string;
}
+var r *regexp.Regexp;
+
func newDocument() *document {
return &document{nil, nil};
}
@@ -57,6 +60,11 @@ func RemoveTag(doc *goquery.Selection, tag string) {
});
}
+func logReg(h []byte) []byte {
+ log.Printf("RegExp: %s", h);
+ return h;
+}
+
func parseDoc(fd *os.File) (*document, error) {
var err error;
var text, t_text string;
@@ -70,7 +78,6 @@ func parseDoc(fd *os.File) (*document, error) {
return nil, errors.New("Can't create goquery documnt");
}
- //TODO test kennygrant/sanatize instead of goquery
body = doc.Find("body");
RemoveTag(body, "script");
RemoveTag(body, "noscript");
@@ -81,20 +88,21 @@ func parseDoc(fd *os.File) (*document, error) {
text, err = body.Html();
t_text, err = title.Html();
+
+ text = r.ReplaceAllString(text, "> <");
+ t_text = r.ReplaceAllString(text, "> <");
+
r_doc = newDocument();
r_doc.text = strings.Fields(sanitize.HTML(text));
r_doc.title = strings.Fields(sanitize.HTML(t_text));
- if len(r_doc.text) == 1 {
- log.Printf("not splittin!!!!!!!!!!!\n");
- os.Exit(1);
- }
return r_doc, nil;
}
func init() {
log.SetOutput(os.Stderr);
+ r, _ = regexp.Compile("><");
}
func main() {