From ac839180a1a3c8477989d887734798da146d7b57 Mon Sep 17 00:00:00 2001 From: Tucker Evans Date: Sat, 2 Dec 2017 16:38:30 -0500 Subject: CSC2621/assignments/webCrawler2: testing merge --theirs --- webCrawler2/crawler.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'webCrawler2') diff --git a/webCrawler2/crawler.go b/webCrawler2/crawler.go index 1c7bc70..5c4dba6 100644 --- a/webCrawler2/crawler.go +++ b/webCrawler2/crawler.go @@ -27,7 +27,7 @@ func validLink(s string) bool { //return (strings.HasSuffix(s, ".html") || strings.HasSuffix(s, "/") || strings.HasSuffix(s, "\\")) } -func addLinks(doc *goquery.Document, jobs chan link, current link, depth int) { +func addLinks(doc *goquery.Document, jobs chan link, current link, depth int, worker_id int) { doc.Find("body a").Each(func(index int, item *goquery.Selection) { link_s, _ := item.Attr("href") @@ -71,7 +71,6 @@ func worker(done chan bool, jobs chan link, depth int, id int, total uint64) { case j := <-jobs: if j.depth < depth { doc, err := goquery.NewDocument(j.u.String()) - docs <- doc if err != nil { log.Print("Error Reading Document: " + j.u.String() + err.Error()) break @@ -83,6 +82,7 @@ func worker(done chan bool, jobs chan link, depth int, id int, total uint64) { addLinks(doc, jobs, j, j.depth, id) } case <-time.After(time.Second * 10): + fmt.Printf("Worker %d done\n", id) done <- true return } @@ -129,8 +129,7 @@ func main() { t = uint64(b) w, _ = strconv.Atoi(os.Args[4]) - links := make(chan link, 1024*1024) - docs := make(chan *goquery.Document, 100) + jobs := make(chan link, 1024*1024) done := make(chan bool) u, err := url.Parse(os.Args[1]) @@ -141,7 +140,7 @@ func main() { if !u.IsAbs() { panic("Cannot start with relative url") } - links <- link{u, 0} + jobs <- link{u, 0} //send first job -- cgit v1.1