From e8b1808eaf87a49e4c34ebbfb66854baa627418c Mon Sep 17 00:00:00 2001 From: Tucker Evans Date: Mon, 18 Feb 2019 07:35:54 -0500 Subject: Moves assignments to given course folder. --- CS2771/alarm/alarm.asm | 65 ++++++ CS2771/alarm/assign.rst | 25 ++ CS3871/filesystem/README.rst | 0 CS3871/filesystem/disk.h | 17 ++ CS3871/filesystem/disk.o | Bin 0 -> 3352 bytes CS3871/filesystem/tfs.c | 352 +++++++++++++++++++++++++++++ CS3871/jobScheduler/cpuScheduleTable.c | 224 ++++++++++++++++++ CS3871/sync/assign.rst | 41 ++++ CS3871/sync/makefile | 5 + CS3871/sync/reader.c | 86 +++++++ CS3871/sync/sync.c | 118 ++++++++++ CS3871/sync/writer.c | 76 +++++++ CS3871/timing/client.c | 133 +++++++++++ CSC2636/search/.gitignore | 6 + CSC2636/search/README.rst | 19 ++ CSC2636/search/assign.rst | 213 +++++++++++++++++ CSC2636/search/index/index.go | 165 ++++++++++++++ CSC2636/search/indexer.go | 402 +++++++++++++++++++++++++++++++++ CSC2636/search/search.go | 144 ++++++++++++ CSC2636/webCrawler2/README.rst | 13 ++ CSC2636/webCrawler2/crawler.go | 164 ++++++++++++++ alarm/alarm.asm | 65 ------ alarm/assign.rst | 25 -- filesystem/README.rst | 0 filesystem/disk.h | 17 -- filesystem/disk.o | Bin 3352 -> 0 bytes filesystem/tfs.c | 352 ----------------------------- jobScheduler/cpuScheduleTable.c | 224 ------------------ search/.gitignore | 6 - search/README.rst | 19 -- search/assign.rst | 213 ----------------- search/index/index.go | 165 -------------- search/indexer.go | 402 --------------------------------- search/search.go | 144 ------------ sync/assign.rst | 41 ---- sync/makefile | 5 - sync/reader.c | 86 ------- sync/sync.c | 118 ---------- sync/writer.c | 76 ------- timing/client.c | 133 ----------- webCrawler2/README.rst | 13 -- webCrawler2/crawler.go | 164 -------------- 42 files changed, 2268 insertions(+), 2268 deletions(-) create mode 100644 CS2771/alarm/alarm.asm create mode 100644 CS2771/alarm/assign.rst create mode 100644 CS3871/filesystem/README.rst create mode 100644 CS3871/filesystem/disk.h create mode 100644 CS3871/filesystem/disk.o create mode 100644 CS3871/filesystem/tfs.c create mode 100644 CS3871/jobScheduler/cpuScheduleTable.c create mode 100644 CS3871/sync/assign.rst create mode 100644 CS3871/sync/makefile create mode 100644 CS3871/sync/reader.c create mode 100644 CS3871/sync/sync.c create mode 100644 CS3871/sync/writer.c create mode 100644 CS3871/timing/client.c create mode 100644 CSC2636/search/.gitignore create mode 100644 CSC2636/search/README.rst create mode 100644 CSC2636/search/assign.rst create mode 100644 CSC2636/search/index/index.go create mode 100644 CSC2636/search/indexer.go create mode 100644 CSC2636/search/search.go create mode 100644 CSC2636/webCrawler2/README.rst create mode 100644 CSC2636/webCrawler2/crawler.go delete mode 100644 alarm/alarm.asm delete mode 100644 alarm/assign.rst delete mode 100644 filesystem/README.rst delete mode 100644 filesystem/disk.h delete mode 100644 filesystem/disk.o delete mode 100644 filesystem/tfs.c delete mode 100644 jobScheduler/cpuScheduleTable.c delete mode 100644 search/.gitignore delete mode 100644 search/README.rst delete mode 100644 search/assign.rst delete mode 100644 search/index/index.go delete mode 100644 search/indexer.go delete mode 100644 search/search.go delete mode 100644 sync/assign.rst delete mode 100644 sync/makefile delete mode 100644 sync/reader.c delete mode 100644 sync/sync.c delete mode 100644 sync/writer.c delete mode 100644 timing/client.c delete mode 100644 webCrawler2/README.rst delete mode 100644 webCrawler2/crawler.go diff --git a/CS2771/alarm/alarm.asm b/CS2771/alarm/alarm.asm new file mode 100644 index 0000000..dd094ed --- /dev/null +++ b/CS2771/alarm/alarm.asm @@ -0,0 +1,65 @@ +; +;30h-39h hold the 7seg bit patterns + mov 30h, #11000000B + mov 31h, #11111001B + mov 32h, #10100100B + mov 33h, #10110000B + mov 34h, #10011001B + mov 35h, #10010010B + mov 36h, #10000010B + mov 37h, #10000111B + mov 38h, #10000000B + mov 39h, #10011000B + + mov +start: + + + +jmp start +;40h-42h hold the 3 digits to display + +display: + mov acc, 50h + mov b, #1000 + div ab + mov 40h, acc + mov acc, b + mov b, #100 + div ab + mov 41h, acc; save 100's digit + mov acc, b ;put remainder in a + mov b, #10 ;find 10's & 1's digit + div ab + mov 42h, acc ;save 10's digit + mov 43h, b ;save 1's digit + + + +;display: + mov r1, #40h ;digits[0] + +loop: ;for each digit + mov acc, #30h + mov r3, acc ;save acc;next blob: + +;safe if you don't care about the +;bits of P3 other than p3.3 & p3.4 + mov a, r1 ;picks which + cpl a ;7seg to use p3.3 & + anl a, #03h ;p3.4 are a func + rl a ;of the low2 bits of + rl a ;the addr where the + rl a ;digits live + mov p1, #0ffh;undraw previous + mov p3, a ;set new 7seg + mov a, r3 ;restore acc + +; p1 = pattern[digit[i]] + add a, @r1 + mov r0, acc + mov p1, @r0 + inc r1 +; mov p1, #0ffh + cjne r1, #44h, loop + ret diff --git a/CS2771/alarm/assign.rst b/CS2771/alarm/assign.rst new file mode 100644 index 0000000..f9c802a --- /dev/null +++ b/CS2771/alarm/assign.rst @@ -0,0 +1,25 @@ +=========== +Alarm clock +=========== + +Write an 8051 program which implements an alarm clock. This project will +have two phases, the first of which is described here. The alarm clock +runs all the time, with the current time displayed on the four 7-segment +displays. In this first phase your clock needs to: + 1. use timers to keep time updated to the second + 2. switch between 12 hour and 24 hour modes based switch bank.0 (0==12 + hr, 1==24hr) + 3. if you are in 12 hour mode, use the decimal point on the last + 7-segment display to indicate AM or PM (off = AM, on == PM) + 4. switch between display of hours:minutes and minutes:seconds based on + switch bank.1 (0==hours:minutes, 1==minutes:seconds) + 5. keep the decimal point of the second 7-segment display lit as a + visual separator + +AM/PM should remain indicated regardless of which display mode you are +using. + +You do not (yet) need to be able to: + - set the time (pick a starting value) + - set an alarm time + - perform an alarm diff --git a/CS3871/filesystem/README.rst b/CS3871/filesystem/README.rst new file mode 100644 index 0000000..e69de29 diff --git a/CS3871/filesystem/disk.h b/CS3871/filesystem/disk.h new file mode 100644 index 0000000..7bf6ad0 --- /dev/null +++ b/CS3871/filesystem/disk.h @@ -0,0 +1,17 @@ +#include +#include +#include + +#define TRACKS 128 +#define SECTORS 4096 + +/* 10ms per track change */ +/* 10ms/SECTORS per unit sector distance > 1 */ + +void dinit(); + +void rsector(int t,int s,unsigned char *b); + +void wsector(int t,int s,unsigned char *b); + + diff --git a/CS3871/filesystem/disk.o b/CS3871/filesystem/disk.o new file mode 100644 index 0000000..b1a36f8 Binary files /dev/null and b/CS3871/filesystem/disk.o differ diff --git a/CS3871/filesystem/tfs.c b/CS3871/filesystem/tfs.c new file mode 100644 index 0000000..fe2d94c --- /dev/null +++ b/CS3871/filesystem/tfs.c @@ -0,0 +1,352 @@ +#include +#include +#include +#include +#include "disk.h" + +#define MAX_INODES 1000 +#define INODE_START (TRACKS * SECTORS) / (8 * 512) +#define MAX_FILES 50 + +struct block_ptr { + char track; + short sector; +}; +struct blockll{ + struct block_ptr data; + struct blockll *next; +}; + +struct meta_data{ + char name[8]; + int size; + int read; + int write; + int create; +}; + +struct inode { + struct meta_data info; + struct block_ptr data[20]; +}; + + +struct inode_list{ + struct inode *node; + struct inode_list *next; +}; + +struct file { + struct inode *node; + int mode; + int next_sec; + int free; +}; + + +int inode_list_size = 0; +struct inode_list *root, *end; + +char bitmap[TRACKS][SECTORS/8]; + +struct file files[MAX_FILES]; +int size; + + + +int check_bitmap(t,s) +int t,s; +{ + char tmp; + tmp = bitmap[t][s/8]; + tmp &= (1 << (s % 8)); + return (int) tmp; +} + +void set_bitmap(t,s) +int t,s; +{ + bitmap[t][s/8] |= (1 << (s % 8)); + return; +} + +void print_bitmap() +{ + int i,j; + for(i = 0; i < 128; i++){ + + printf("\n%4d ", i); + for (j = 0; j < 4096/8; j++) { + printf("%02x", bitmap[i][j]); + if (j %31 == 0) { + printf("\n%4d ",i); + } + } + } +} + + +/* TODO + * Implement inode table as binary tree to speedup searches + */ +struct inode* inode_search(name) +char *name; +{ + if (strcmp(name,"") == 0) { + return -1; + } + int i; + struct inode_list *tmp = root; + + for(i = 0; i < MAX_INODES && i < inode_list_size; i++){ + tmp = tmp->next; + if(strcmp(name, tmp->node->info.name) == 0) + return tmp->node; + } + return -2; +} + +struct blockll* get_blocks(size) +int size; +{ + int i, t, s; + struct blockll *root, *current = malloc(sizeof(struct blockll)); + root = current; + + for (i = 0; size > 0 && i < (4096 * 128); i++) { + t = i / 4096; + s = i % 4096; + + if (!check_bitmap(t, s)) { + current->next = malloc(sizeof(struct blockll)); + current = current->next; + current-> next = NULL; + current->data.track = (char) t; + current->data.sector = (short) s; + + set_bitmap(t,s); + size-= 512; + } + } + + return i <(4096 * 128) ? root : NULL; +} + +struct inode_list* inode_create(name) +char *name; +{ + struct timeval *tmp_time = malloc(sizeof(struct timeval)); + + struct inode_list *tmp = malloc(sizeof(struct inode_list)); + struct inode *tmp_node = malloc(sizeof(struct inode)); + + + tmp->node = tmp_node; + + memcpy(&(tmp->node->info.name), name, strlen(name)); + + gettimeofday(tmp_time, NULL); + + tmp->node->info.create = tmp_time->tv_sec; + tmp->node->info.read = tmp_time->tv_sec; + tmp->node->info.write = tmp_time->tv_sec; + + end->next = tmp; + end = tmp; + inode_list_size++; + + return tmp; +} + +int inode_init() +{ + int n = MAX_INODES / 4; + int i; + char *ptr; + struct inode_list *tmp; + + if (MAX_INODES % 4 > 0) + n++; + + char *buf = malloc(512 * n); + + for (i =0; i < n; i++) { + rsector(0, i, buf + (512 * i)); + } + ptr = buf; + + tmp = root; + + for(i=0; i< MAX_INODES; i++) { + tmp->next = malloc(sizeof(struct inode_list)); + memcpy(&tmp->node, ptr, 64); + ptr += 64; + tmp = tmp->next; + inode_list_size++; + } +} + +/*save inodes to first n sectors on disk*/ +void inode_save() +{ + int i, j; + char *buf = malloc(512); + struct inode_list *tmp = root; + + for (i = 0; i < MAX_INODES && tmp->next;i++) { + for (j = 0; j < 4; j++){ + tmp = tmp->next; + memcpy(buf + j, tmp->node, sizeof(struct inode)); + } + wsector(0, INODE_START + i, buf); + } +} + +struct inode* inode_from_fd(fd) +int fd; +{ + int i; + struct inode_list *tmp = root; + + for (i = 0; i < fd; i++) { + tmp = tmp->next; + } + + return tmp->node; +} + +int find_fd() +{ + int i; + for (i = 0; i < size; i++) { + if (files[i].free) + return i; + } +} + +int tfs_init() +{ + int i; + root = malloc(sizeof(struct inode_list)); + end = root; + + dinit(); + /* + * + * has issue if inodes have not been written to disk i.e. first run + inode_init(); + */ + for (i = 0; i < MAX_FILES; i++) { + files[i].free = 1; + } +} + + +int open(fname, mode) +char *fname, *mode; +{ + struct inode *fnode = inode_search(fname); + int fd; + + if (fnode == -1) + return -1; + + if (fnode == -2){ + fnode = inode_create(fname)->node; + } + fd = find_fd(); + + files[fd].node = fnode; + files[fd].mode = *mode; + files[fd].next_sec = 0; + files[fd].free = 0; + size++; + + return fd; +} + +int close(fd) +int fd; +{ + if (files[fd].free) + return -1; + + files[fd].free = 1; + return 1; +} + +int read(fd, buf) +int fd; +char *buf; +{ + if (files[fd].free || files[fd].mode || (files[fd].next_sec == 20)) + return -1; + + + rsector(files[fd].node->data[files[fd].next_sec].track, files[fd].node->data[files[fd].next_sec].sector, buf); + files[fd].next_sec++; + return 512; +} + +int write(fd, buf) +int fd; +char *buf; +{ + if ((files[fd].next_sec == 20) || !files[fd].mode) + return 0; + + struct blockll *tmp = get_blocks(500); + files[fd].node->data[files[fd].next_sec].track = tmp->data.track; + files[fd].node->data[files[fd].next_sec].sector = tmp->data.sector; + + wsector(files[fd].node->data[files[fd].next_sec].track, files[fd].node->data[files[fd].next_sec].sector, buf); + return 1; +} + +int ulink(fname) +char *fname; +{ + struct inode_list *tmp = root; + struct inode *d; + int i; + + for(i = 0; i < MAX_INODES && i < inode_list_size; i++){ + tmp = tmp->next; + if(strcmp(fname, tmp->next->node->info.name) == 0) + break;; + } + + d = tmp->next->node; + tmp->next = tmp->next->next; + free(d); + return 1; + } + +int main() +{ + tfs_init(); + + /* + *Test Writing + */ + int mode = 1; + int fd = open("test", &mode); + + char buf[512]; + memcpy(&buf, "Hello Filesystem", strlen("Hello Filesystem")); + + int test = write(fd, &buf); + close(fd); + + /* + *Test reading + */ + mode = 0; + fd = open("test", &mode); + char buf2[512]; + read(fd, &buf2); + printf("wrote: %s\n", buf); + + printf("read: %s\n", buf2); + +} diff --git a/CS3871/jobScheduler/cpuScheduleTable.c b/CS3871/jobScheduler/cpuScheduleTable.c new file mode 100644 index 0000000..5d2256a --- /dev/null +++ b/CS3871/jobScheduler/cpuScheduleTable.c @@ -0,0 +1,224 @@ +#include +#include +#include +#include +#include +#include +#include + +#define PROCESS_COUNT 4 +#define time_calc(x, y) ((y.tv_sec - x.tv_sec) * 1000000) + (y.tv_usec - x.tv_usec) + + +/* TODO +add heap for priority +compute priority +*/ + +int go=1; +int turn=0; +float v; +float throughput; +struct timeval start_time; +int turns; +int completed_jobs; + +struct process { + int virgin; + int (*proc)(int); + int turnaround[9]; + struct timeval start_t; + int runs; + float save; + int turn; +}; + +struct process pt[PROCESS_COUNT]; /*process table*/ + +void +cpusched(signum) +int signum; +{ + pt[turn].virgin=0; + pt[turn].save=v; + go=0; +} + +int +f0(x) +int x; +{ + register int i=0; + register int t; + signal(SIGUSR1, cpusched); + if (!x) goto start; + v=0.0; + +start: + while (go && (v > -200.0)) { + printf("0");fflush(stdout); + t = (rand() % 1024); + v -= 2.0; + if ((v>0.0) || ((((int) v) % 2) == -1)) { + printf("f0, found odd or positive, v= %f\n", v); + exit(1); + } + usleep(t*100); + } + if (v <= -200.0) pt[0].virgin=1; + go=1; +} + +int +f1(x) +int x; +{ + register int i=0; + register int t; + if (!x) goto start2; + v= -1.0; + +start2: + while (go && (v > -401.0)) { + printf("1");fflush(stdout); + t = (rand() % 2048); + v -= 2.0; + if ((v>0.0) || ((((int) v) % 2) != -1)) { + printf("f1, found even or positive\n"); + exit(1); + } + usleep(t*100); + } + if (v <= -401.0) pt[1].virgin=1; + go=1; +} + +int +f2(x) +int x; +{ + register int i=0; + register int t; + if (!x) goto start3; + v= 1.0; + +start3: + while (go) { + printf("2");fflush(stdout); + t = (rand() % 4096); + v += 2.0; + if ((v<0.0) || ((((int) v) % 2) != 1)) { + printf("f2, found even or negative\n"); + exit(1); + } + usleep(t*100); + } + go=1; +} + +int +f3(x) +int x; +{ + register int i=0; + register int t; + if (!x) goto start4; + v= 0.0; + +start4: + while (go) { + printf("3");fflush(stdout); + t = (rand() % 4096); + v += 2.0; + if ((v<0.0) || ((((int) v) % 2) == 1)) { + printf("f3, found odd or negative\n"); + exit(1); + } + usleep(t*100); + } + go=1; +} + +void turnaround_calc(p, n) +struct process *p; +int n; +{ + int i; + p->turnaround[8] = 0; + for (i = 0; i < n; i++) { + p->turnaround[8] += p->turnaround[i] / n; + } + return; +} + +int main(argc, argv, envp) +int argc; +char **argv, **envp; +{ + int pid, i, last; + struct timeval end_t; + gettimeofday(&start_time, NULL); + + for (i = 0; i < PROCESS_COUNT; i++) { + pt[i].virgin = 1; + } + + pt[0].proc=f0; + pt[1].proc=f1; + pt[2].proc=f2; + pt[3].proc=f3; + + signal(SIGUSR1, cpusched); + if (pid=fork()) { + while (1) { + go = 1; + sleep(5); + if (go) + kill(pid, SIGUSR1); + } + } else { + while (1) { + printf("turn= %d\n", ++turns); + v=pt[turn].save; + if (pt[turn].virgin) + gettimeofday(&pt[turn].start_t, NULL); + + pt[turn].proc(pt[turn].virgin); + + gettimeofday(&end_t, NULL); + + if (pt[turn].virgin) { + pt[turn].turnaround[pt[turn].runs % 8] = time_calc(pt[turn].start_t, end_t); + turnaround_calc(&pt[turn], (pt[turn].runs < 8 ? pt[turn].runs + 1 : 8)); + pt[turn].runs++; + completed_jobs++; + kill(getppid(), SIGUSR1); + } else if (pt[turn].runs == 0){ + pt[turn].turnaround[8] = (time_calc(pt[turn].start_t, end_t)) / 2; + } + + throughput = completed_jobs / (float)(end_t.tv_sec - start_time.tv_sec); + pt[turn].turn++; + + if (turns == 1000) { + kill(getppid(), 9); + exit(1); + } + + printf("\n"); + + for (i = 0; i < 4; i++){ + printf("\t[%d]\tturnaround= %9d\truns= %3d\tturn= %d\n", i, pt[i].turnaround[8], pt[i].runs, pt[i].turn); + } + + printf("\ntime= %5d\tthroughput= %9f\tcompleted_jobs= %d\n\n\n", end_t.tv_sec - start_time.tv_sec, throughput, completed_jobs); + + last = turn; + turn = 0; + for (i = 1; i < PROCESS_COUNT; i++) { + turn = ((pt[turn].turnaround[8] < pt[i].turnaround[8]) && (turn != last)) ? turn : i; + } + + } + } +} diff --git a/CS3871/sync/assign.rst b/CS3871/sync/assign.rst new file mode 100644 index 0000000..ea4a566 --- /dev/null +++ b/CS3871/sync/assign.rst @@ -0,0 +1,41 @@ +================ +Syncronization 1 +================ + +Write a program that uses semaphores to implement a readers/writers solution. Your program should: + + - be written in C and use the standard kernel IPC mechanisms (semget,semop,shmget etc) + - be written as a single top level source file compiled as: gcc –o myprog myprog.c + - take two arguments on the command line: myprog NR NW where NR,NW are each integers specifying the number of reader/writer processes respectively + - use fork/exec to create the readers/writers + +The shared memory segment should be 16k bytes in size, with all bytes initialized to 0x30 + +Reader: + +.. code :: + + open a file (for append) named reader.N, where N is the reader number + while (1) { + for (i=0; i<16k; i++) { + read the next byte of the shared memory segment + write that byte to the file + flush the file + } + sleep a random number of seconds, between 0 and N inclusive + } + +Writer: + +.. code :: + + while (1) { + for (i=0; i<16k; i++) + shared memory segment[i] = N + 0x30; + sleep a random number of seconds between 0 and 2*N inclusive + } + +readers and writers should be mutually exclusive +multiple concurrent readers are allowed +writers have priority over readers +writers are mutually exclusive relative to each other diff --git a/CS3871/sync/makefile b/CS3871/sync/makefile new file mode 100644 index 0000000..dd9c463 --- /dev/null +++ b/CS3871/sync/makefile @@ -0,0 +1,5 @@ +sync: sync.c reader.c writer.c + cc -o sync sync.c + cc -o reader reader.c + cc -o writer writer.c + diff --git a/CS3871/sync/reader.c b/CS3871/sync/reader.c new file mode 100644 index 0000000..929eba1 --- /dev/null +++ b/CS3871/sync/reader.c @@ -0,0 +1,86 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSEM 3 +#define KEY 52 + +char *mem; + +void quit(signum) +int signum; +{ + shmdt(mem); + exit(1); +} + +int main(argc, argv) +int argc; +char **argv; +{ + int shmid, semid, i, pid, id; + char filename[50]; + FILE *fd; + struct sembuf sb; + + if (argc < 2) { + printf("usage: reader [id]\n"); + exit(1); + } + + id = atoi(argv[1]); + + + if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ + perror("shmget: shmget failed"); + exit(1); + } + + if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { + perror("shmat"); + exit(1); + } + + if ((semid = semget(shmid, NSEM, 0)) == -1) { + perror("Rsemget: "); + exit(1); + } + + signal(SIGQUIT, quit); + + sprintf(filename, "reader.%d", id); + + fd = fopen(filename, "a"); + + if (!fd) { + perror("fopen: "); + exit(1); + } + srand(time(NULL)); + + while (1) { + sb.sem_num = 0; sb.sem_op = -1; sb.sem_flg = 0; + semop(semid, &sb, 1); + + for (i = 0; i < 1<<14; i++) { + fprintf(fd, "%c", *(mem + i)); + fflush(fd); + } + fprintf(fd, "\n"); + fflush(fd); + + sb.sem_op = 1; + semop(semid, &sb, 1); + + + sleep(rand() % (id + 1)); + } +} diff --git a/CS3871/sync/sync.c b/CS3871/sync/sync.c new file mode 100644 index 0000000..23d6ba1 --- /dev/null +++ b/CS3871/sync/sync.c @@ -0,0 +1,118 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSEM 3 + +union semun { + int val; + struct semid_ds *buf; + ushort *array; +}; + + +int shmid, semid; + +void quit(signum) +int signum; +{ + shmctl(shmid, IPC_RMID, NULL); + semctl(semid, IPC_RMID, 0); +} + + +int main(argc, argv) +int argc; +char **argv; +{ + int i, pid, n_read, n_write, w; + char *mem, **arg_r, **arg_w; + union semun semarg; + + if (argc < 2) { + printf("usage: sync [number readers] [number writers]\n"); + exit(1); + } + n_read = atoi(argv[1]); + n_write = atoi(argv[2]); + + if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ + perror("shmget: shmget failed"); + exit(1); + } + + if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { + perror("shmat"); + exit(1); + } +printf("Sshmid: %x\n", shmid); + signal(SIGQUIT, quit); + + for (i = 0; i < 1<<14; i++) { + *(mem + i) = 0x30; + } + + if ((semid = semget(shmid, NSEM, 0666 | IPC_CREAT)) == -1) { + perror("Ssemget: "); + exit(1); + } + + semarg.val = 1; + for (i = 0; i < NSEM; i++) { + if ((semctl(semid, i, SETVAL, semarg)) == -1) { + perror("semctl: "); + exit(1); + } + } + + + arg_r = malloc(sizeof(char*) * 3); + arg_w = malloc(sizeof(char*) * 3); + *arg_r = malloc(sizeof(char) * 10); + *arg_w = malloc(sizeof(char) * 10); + + *(arg_r + 1) = malloc(sizeof(char) * 50); + *(arg_w + 1) = malloc(sizeof(char) * 50); + + *arg_r = "reader"; + *arg_w = "writer"; + + *(arg_r + 2) = NULL; + *(arg_w + 2) = NULL; + + for (i = 0; i < n_read; i++){ + sprintf(*(arg_r + 1), "%d", i); + if (pid = fork()) { + /* printf("starting reader %d...\n", i); */ + } else { + int ret = execv("./reader", arg_r); + printf("exec retern %d", ret); + } + } + + for (i = 0; i < n_write; i++) { + sprintf(*(arg_w + 1), "%d", i); + if (pid = fork()) { + /* printf("starting writer %d...\n", i); */ + } else { + execvp("./writer", arg_w); + } + + } + + shmdt(mem); +printf("sync done...\n"); +/* TODO + * why is this returning 8 + */ + for (i = 0; i < (n_write + n_read); i++) { + wait(&w); + printf("\nReturned with code:%d\n", WEXITSTATUS(w)); + } + quit(); +} diff --git a/CS3871/sync/writer.c b/CS3871/sync/writer.c new file mode 100644 index 0000000..d38dd70 --- /dev/null +++ b/CS3871/sync/writer.c @@ -0,0 +1,76 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define NSEM 3 + +char *mem; + +void quit(signum) +int signum; +{ + shmdt(mem); + exit(1); +} + +int main(argc, argv) +int argc; +char **argv; +{ + int shmid, semid, i, pid, id; + char *mem; + struct sembuf sb; + + if (argc < 2) { + printf("usage: writer [id]\n"); + exit(1); + } + + id = atoi(argv[1]); + + + if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ + perror("shmget: shmget failed"); + exit(1); + } + + if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { + perror("shmat"); + exit(1); + } +printf("Wshmid: %x\n", shmid); + + if ((semid = semget(shmid, NSEM, 0)) == -1) { + perror("Wsemget: "); + exit(1); + } + + signal(SIGQUIT, quit); + + srand(time(NULL)); + + while (1) { + rand() % id; + + sb.sem_num = 0; sb.sem_op = -1; sb.sem_flg = 0; + semop(semid, &sb, 1); + + for (i = 0; i < 1<<14; i++) { + mem[i]= 0x30 + id; + } + + sb.sem_op = 1; + semop(semid, &sb, 1); + + sleep(rand() % ((id * 2) + 1)); + } +} diff --git a/CS3871/timing/client.c b/CS3871/timing/client.c new file mode 100644 index 0000000..9a83a5b --- /dev/null +++ b/CS3871/timing/client.c @@ -0,0 +1,133 @@ +#include +#include +#include +#include +#include +#include +#include + +#define BUFFER_SIZE 1024 + +typedef struct number_val { + int val; + struct timeval tv; +} num; + +int sock_init(argv) +char **argv; +{ + struct sockaddr_in address, serv_addr; + int sock = 0, valread; + char buffer[1024] = {0}; + + if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { + perror("Socket creation error\n"); + return -1; + } + + memset(&serv_addr, '0', sizeof(serv_addr)); + + serv_addr.sin_family = AF_INET; + serv_addr.sin_port = htons(atoi(argv[2])); + + if (inet_pton(AF_INET, argv[1], &serv_addr.sin_addr) <= 0) { + perror("Invalid address/Address not supported\n"); + return -1; + } + + if (connect(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { + perror("Connection Failed"); + return -1; + } + return sock; +} + +int main(argc, argv) +int argc; +char **argv; +{ + int sock = 0, valread, start_ptr = -1, end_ptr = 0, qset, i; + double avg = 0, cnt; + char buffer[1024] = {0}; + num num_buffer[BUFFER_SIZE]; + fd_set rs, ws, es; + struct timeval now, *ww, tmp; + + sock = sock_init(argv); + + ww = NULL; + + printf("Connected, waiting for numbers...\n"); + while (1) { + FD_ZERO(&rs); FD_ZERO(&ws); FD_ZERO(&es); + FD_SET(sock, &rs); + + qset = select(sock + 1, &rs, (fd_set *) 0, (fd_set *) 0, ww); + + gettimeofday(&now, NULL); + now.tv_sec = now.tv_sec - 60; + + ww = &tmp; + tmp.tv_sec = 0; + tmp.tv_usec = 500000; + + if (FD_ISSET(sock, &rs)) { + valread = read(sock, buffer, 1024); + for (i = 0; i < valread; i+=2) { + if (end_ptr == BUFFER_SIZE) + end_ptr = 0; + if (end_ptr != start_ptr) { + gettimeofday(&(num_buffer[end_ptr].tv), NULL); + num_buffer[end_ptr].val = atoi(buffer + i); + end_ptr++; + } else { + printf("dropping number...\n"); + } + } + } + + if (start_ptr == -1) + start_ptr = 0; + + if (start_ptr < end_ptr) { + for (i = start_ptr; i < end_ptr; i++) { + if (num_buffer[i].tv.tv_sec <= now.tv_sec) + start_ptr = i + 1; + } + } else { + for (i = start_ptr; i < BUFFER_SIZE; i++) { + num_buffer[end_ptr].val = atoi(buffer); + end_ptr++; + num_buffer[end_ptr].val = atoi(buffer); + end_ptr++; + num_buffer[end_ptr].val = atoi(buffer); + end_ptr++; + if (num_buffer[i].tv.tv_sec <= now.tv_sec) + start_ptr = i + 1; + } + + for (i = 0; i < end_ptr; i++) { + if (num_buffer[i].tv.tv_sec <= now.tv_sec) + start_ptr = i + 1; + } + + start_ptr %= BUFFER_SIZE; + } + + avg = 0; + cnt = start_ptr > end_ptr ? (BUFFER_SIZE - start_ptr + end_ptr) : (end_ptr - start_ptr); + cnt = cnt == 0 ? 10 : cnt; + + if (start_ptr < end_ptr) { + for (i = start_ptr; i < end_ptr; i++) + avg += num_buffer[i].val / cnt; + } else { + for (i = start_ptr; i < BUFFER_SIZE; i++) + avg += num_buffer[i].val / cnt; + for (i = 0; i < end_ptr; i++) + avg += num_buffer[i].val /cnt; + } + + printf("avg: %10.5f\n", avg); + } +} diff --git a/CSC2636/search/.gitignore b/CSC2636/search/.gitignore new file mode 100644 index 0000000..7523492 --- /dev/null +++ b/CSC2636/search/.gitignore @@ -0,0 +1,6 @@ +*test* +pages +index.dat +indexer +search +*.swp diff --git a/CSC2636/search/README.rst b/CSC2636/search/README.rst new file mode 100644 index 0000000..e1d14fb --- /dev/null +++ b/CSC2636/search/README.rst @@ -0,0 +1,19 @@ +============= +Search Engine +============= + +Setup +===== +In order for search.go to use the index package the directory "index" +must by copied (or linked) into a directory "search" that is in your +GOPATH. + +About +===== +Search Engine for web science class. + +See assign.rst for assignment details. + +Authors +======= +- Tucker Evans diff --git a/CSC2636/search/assign.rst b/CSC2636/search/assign.rst new file mode 100644 index 0000000..66e537e --- /dev/null +++ b/CSC2636/search/assign.rst @@ -0,0 +1,213 @@ +======================== +Project 2: Search Engine +======================== + +**CS2621– Web Science** + +*100 points* + +You are to create a web search engine that works at the command line. +To do this, you will write two Python scripts, indexer.py and +search.py. + +Indexer +======= + +Indexer.py should do the following: + +1. After performing a crawl (using your other Python script), read all + the HTML files that were stored in the “pages” directory. For each + document, extract the title and the text from the body of the page + (read the Beautiful Soup documentation to find out how). Beautiful + Soup will include the text of the page in the content of the page, + and that is OK. Beautiful Soup may also break on some pages and + include HTML as text, but we will not worry about these + exceptions or bugs. + +2. All text should be converted to lowercase and non-alphanumeric + characters should be ignored. So “123-456” would become “123” and + “456”, and “joe@yahoo.com” would become “joe”, “yahoo”, “com”. + Ignore the following stop words: a, an, and, are, as, at, be, by, + for, from, has, he, in, is, it, its, of, on, that, the, to, was, + were, will, with. Do not perform stemming. + +3. A single inverted index should be created for the document corpus + which maintains the document ID (numbered 1…n in order of the pages + found in the “pages” directory), a 1 or 0 if the text is found in + the title, and the term frequency from the body (normalized by the + total number of tokens in the document after removing stop words). + +4. After indexer.py has finished indexing all the web pages, it should + output the index to index.dat which looks likethis: + +:: + + arkansas + 6 0 0.022 + model + 1 0 0.309 + 3 0 0.015 + 5 1 0.001 + tuesday + 2 0 0.082 + white + 2 1 0.018 + etc… + +.. note :: + The indexed words are alphabetized, and there are 3 spaces before + sets of three numbers (each separated by a single space) which are: + doc ID, title (0 or 1), and normalized body TF (rounded to 3 decimal + places). For example, the term white was found only in document 2; + it was somewhere in the title and made up 1.8% of all the words in + the document. + +5. It may take some time for your program to run, so you should output + information about the program’s status as it indexes the crawled + pages. Outputting what file is being worked on would be helpful to + the user who is waiting for the program to finish its work. + +Search +====== + +After the index is written to index.dat, the search.py script will +allow the user to search the corpus for specific words. Here is how +it should operate: + +1. First, read the search phrase at the command line. Examples: + + .. code :: bash + + $ search.py bisons + $ search.py "landmark college" + +If no command line argument is supplied, the program should tell the +user a search term is required and terminate. Ignore any command-line +arguments after the first. + +2. Next, the program should read the index from index.dat into memory. + Note that you may want to use similar data structures used in + indexer.py, so you should write your programs in a way where you + share code without having redundant code in each script. (It’s OK + to introduce new .py files to your project.) + +3. For simplicity, all queries will be assumed to use boolean ANDs, + and we will not implement phrase search. For example, the query + landmark college should generate a boolean search for landmark AND + college, so only documents containing both terms should be + considered amatch. + +4. Remove any stop words from the query as was done when indexing the + documents. + +5. After determining which documents match the search terms, calculate + the relevancy score for each document: relevancy score = 0.9 * body + TF + 0.1 * title score Do this for each term, and compute the + average relevancy score for all terms. So if the search was for + landmark college, you would compute the score for landmark and the + score for college and compute the average to determine the overall + relevancy score. + +6. The total number of results should first be displayed. Then display + every document ID and score (out to 3 decimal places) ordered by + score, and number the results. Example: + +.. code:: bash + + Results: 4 + 1. docID, 3, score, 0.830 + 2. docID, 1, score, 0.814 + 3. docID, 5, score, 0.350 + 4. docID, 8, score, 0.108 + +**Bonus:** You can receive 5 bonus points by implementing phrase search. +So when the user searches for “landmark college”, assume they want +only documents with that exact phrase. To accomplish this, you will +need to store the positions of the terms that are stored in the +inverted index. Then use those positions to ensure the phrase matches +successive positions. + + +Zip your entire project directory and submit it +to Canvas before it is due. Make sure your output matches the +specifications precisely to avoid losing any points. If you use any +code you find in the Web, you must document the source in your +program. + +Test Data +========= + +*a.html* + +.. code:: html + + cool!!! test!!! + + this 123-456. + + +*b.html* + +.. code:: html + + + + Go Patriots! + + + And another test and test! + + + +*c.html* + +.. code:: html + + + This is a test. + + +*Inverted index:* + +.. code:: + + 123 + a 0 0.200 + 456 + a 0 0.200 + another + b 0 0.200 + cool + a 1 0.200 + patriots + b 1 0.200 + go + b 1 0.200 + test + a 1 0.200 + c 0 0.500 + b 0 0.400 + this + a 0 0.200 + c 0 0.500 + +Search for "test this" results in the following: + +:: + + Results: 2 + 1. docID c, score 0.450 + 2. docID a, score 0.230 + +Search for "test patriots go" results in the following: + +:: + + Results: 1 + 1. docID b, score 0.310 + +Search for "cool patriots" results in the following: + +:: + + Results: 0 diff --git a/CSC2636/search/index/index.go b/CSC2636/search/index/index.go new file mode 100644 index 0000000..5d8ab65 --- /dev/null +++ b/CSC2636/search/index/index.go @@ -0,0 +1,165 @@ +package index + +import "fmt" +import "os" +import "io" +import "bufio" +import "sort" +import "errors" +import "strings" +import "strconv" + +/* TODO + + - Implement Forward Creation + - Implement Inverted from Forward + - Switch Indexer.go over to this package + +/********* + * Types * + *********/ + +type F_info struct { + Word string; + In_title bool; + Freq float64; +}; + +type I_info struct { + Doc string; + In_title bool; + Freq float64; +}; + +type F_entry struct{ + This *F_info; + Next *F_entry; +}; + +type I_entry struct{ + This *I_info; + Next *I_entry; +}; + +type F_index map[string]*F_entry; +type I_index map[string]*I_entry; + +type sortInverted struct{ + w string; + root *I_entry; +}; + + +/*************************** + * Forward Index Funcitons * + ***************************/ + +func NewForwardEntryStrings(text, title []string) (*F_entry, error) { + return nil, errors.New("not implemented"); +} + +/**************************** + * Inverted Index Functions * + ****************************/ + +func new_I_info() *I_info{ + return &I_info{"", false, 0.0}; +} + +func NewInvertedIndexFromFile(fname string) (I_index, error) { + var fd *os.File; + var br *bufio.Reader; + var err error; + var buf []byte; + var tmp *I_info; + var cur *I_entry; + var index I_index; + var word string + var info []string; + + fd, err = os.Open(fname); + if err != nil { + return nil, err; + } + + br = bufio.NewReader(fd); + if br == nil { + return nil, errors.New("Could not initialize reader"); + } + + index = make(I_index); + + for buf, err = br.ReadBytes('\n'); err != io.EOF; buf, err = br.ReadBytes('\n'){ + tmp = new_I_info(); + if err != nil { + return nil, err; + } + if buf[0] != '\t' { + word = strings.TrimSpace(string(buf)); + } else { + info = strings.Fields(string(buf)); + tmp.Doc = info[0]; + tmp.In_title = (info[1] == "1"); + tmp.Freq, _ = strconv.ParseFloat(info[2], 32); + if (index[word] == nil) { + index[word] = &I_entry{This: tmp, Next: nil}; + } else { + cur = index[word]; + for cur.Next != nil { + cur = cur.Next; + } + cur.Next = &I_entry{This: tmp, Next: nil}; + } + } + } + + return index, nil; +} + +func NewInvertedFromForward(f F_index) (I_index, error) { + return nil, errors.New("not implemented"); + +} + +func (x I_index) PrintToFile(fd *os.File) error{ + var i int; + var cur *I_entry; + var index []sortInverted; + + index = x.sortIndex(); + + for i = 0; i < len(index); i++ { + fmt.Fprintf(fd, "%s\n", index[i].w); + for cur = index[i].root; cur != nil; cur = cur.Next { + fmt.Fprintf(fd, "\t%s %d %.3f\n", cur.This.Doc, toInt(cur.This.In_title), cur.This.Freq); + } + } + return nil; +} + +func toInt(t bool) int{ + if (t){ + return 1; + } + return 0; +} + +func (unsort I_index) sortIndex() []sortInverted { + var i int; + var sorted []sortInverted; + + sorted = make([]sortInverted, len(unsort)); + + i = 0; + for k, v := range unsort { + sorted[i].w = k; + sorted[i].root = v; + i++; + } + + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].w < sorted[j].w; + }); + + return sorted +} diff --git a/CSC2636/search/indexer.go b/CSC2636/search/indexer.go new file mode 100644 index 0000000..d95f126 --- /dev/null +++ b/CSC2636/search/indexer.go @@ -0,0 +1,402 @@ +package main + +import "os" +import "sort" +import "golang.org/x/net/html" +import "log" +import "fmt" +import "github.com/PuerkitoBio/goquery" +import "github.com/kennygrant/sanitize" +import "strings" +import "flag" +import "errors" +import "regexp" + +type document struct { + fname string; + title []string; + text []string; + length int; +} + +type index struct { + doc *document; + title bool; + freq int; +} + +type wordSort struct { + w string; + root *wordList; +} + +type wordList struct { + this *index + next *wordList +} + +var r, nonAN *regexp.Regexp; +var stopWords []*regexp.Regexp; + + +func newDocument() *document { + return &document{"" , nil, nil, 0}; +} + +func RemoveNode(r, rn *html.Node) { + var found bool; + var n, item *html.Node; + var nodes map[int]*html.Node; + var i, j int; + + found = false; + nodes = make(map[int]*html.Node); + + for n = r.FirstChild; n != nil; n = n.NextSibling { + if n == rn { + found = true; + n.Parent.RemoveChild(n); + } + + nodes[i] = n; + i++; + } + + if !found { + for j = 0; j < i; j++ { + item = nodes[j]; + RemoveNode(item, rn); + } + } +} +func RemoveTag(doc *goquery.Selection, tag string) { + doc.Find(tag).Each(func(i int, s *goquery.Selection) { + RemoveNode(doc.Get(0), s.Get(0)); + }); +} + +func logReg(h []byte) []byte { + log.Printf("RegExp: %s", h); + return h; +} + +func parseDoc(fd *os.File, f_info os.FileInfo) (*document, error) { + var err error; + var text, t_text string; + var doc *goquery.Document; + var body, title *goquery.Selection; + var r_doc *document; + var i int; + + doc, err = goquery.NewDocumentFromReader(fd); + if err != nil { + log.Printf("goquery error: %s\n", err); + return nil, errors.New("Can't create goquery documnt"); + } + + body = doc.Find("body"); + RemoveTag(body, "script"); + RemoveTag(body, "noscript"); + + title = doc.Find("title"); + + //TODO add error detection + text, err = body.Html(); + t_text, err = title.Html(); + + + text = r.ReplaceAllString(text, "> <"); + t_text = r.ReplaceAllString(t_text, "> <"); + + text = sanitize.HTML(text); + t_text = sanitize.HTML(t_text); + + text = strings.ToLower(text); + t_text = strings.ToLower(t_text); + + text = nonAN.ReplaceAllString(text, " "); + t_text = nonAN.ReplaceAllString(t_text, " "); + + + for i = 0; i < len(stopWords); i++ { + text = stopWords[i].ReplaceAllString(text, " "); + t_text = stopWords[i].ReplaceAllString(t_text, " "); + } + r_doc = newDocument(); + + r_doc.fname = f_info.Name(); + r_doc.text = strings.Fields(text); + r_doc.title = strings.Fields(t_text); + r_doc.length = len(r_doc.text) + len(r_doc.title); + + return r_doc, nil; +} +func boolToInt(t bool) int { + if t { + return 1; + } + return 0; +} + +func printIndex(words []wordSort, fd *os.File) { + var i int; + var cur *wordList; + var fname string; + var t int; + var freq float64; + + for i = 0; i < len(words); i++ { + fmt.Fprintf(fd, "%s\n", words[i].w); + for cur = words[i].root; cur != nil; cur = cur.next { + fname = cur.this.doc.fname; + t = boolToInt(cur.this.title); + freq = float64(cur.this.freq) / float64(cur.this.doc.length); + + fmt.Fprintf(fd,"\t%s %d %.3f\n", fname, t, freq); + } + } +} + +func init() { + var err error; + log.SetOutput(os.Stderr); + r, err = regexp.Compile("><"); + if err != nil { + panic(err); + } + nonAN, err = regexp.Compile("[^a-zA-Z0-9]+"); + if err != nil { + panic(err); + } + //TODO add func to read in stop words from a file; + stopWords = make([]*regexp.Regexp, 26) + if err != nil { + panic(err); + } + stopWords[0], err = regexp.Compile("\\W+and\\W+"); + if err != nil { + panic(err); + } + stopWords[1], err = regexp.Compile("\\W+a\\W+"); + if err != nil { + panic(err); + } + stopWords[2], err = regexp.Compile("\\W+an\\W+"); + if err != nil { + panic(err); + } + stopWords[3], err = regexp.Compile("\\W+and\\W+"); + if err != nil { + panic(err); + } + stopWords[4], err = regexp.Compile("\\W+are\\W+"); + if err != nil { + panic(err); + } + stopWords[5], err = regexp.Compile("\\W+as\\W+"); + if err != nil { + panic(err); + } + stopWords[6], err = regexp.Compile("\\W+at\\W+"); + if err != nil { + panic(err); + } + stopWords[7], err = regexp.Compile("\\W+be\\W+"); + if err != nil { + panic(err); + } + stopWords[8], err = regexp.Compile("\\W+by\\W+"); + if err != nil { + panic(err); + } + stopWords[9], err = regexp.Compile("\\W+for\\W+"); + if err != nil { + panic(err); + } + stopWords[10], err = regexp.Compile("\\W+from\\W+"); + if err != nil { + panic(err); + } + stopWords[11], err = regexp.Compile("\\W+has\\W+"); + if err != nil { + panic(err); + } + stopWords[12], err = regexp.Compile("\\W+he\\W+"); + if err != nil { + panic(err); + } + stopWords[13], err = regexp.Compile("\\W+in\\W+"); + if err != nil { + panic(err); + } + stopWords[14], err = regexp.Compile("\\W+is\\W+"); + if err != nil { + panic(err); + } + stopWords[15], err = regexp.Compile("\\W+it\\W+"); + if err != nil { + panic(err); + } + stopWords[16], err = regexp.Compile("\\W+its\\W+"); + if err != nil { + panic(err); + } + stopWords[17], err = regexp.Compile("\\W+of\\W+"); + if err != nil { + panic(err); + } + stopWords[18], err = regexp.Compile("\\W+on\\W+"); + if err != nil { + panic(err); + } + stopWords[19], err = regexp.Compile("\\W+that\\W+"); + if err != nil { + panic(err); + } + stopWords[20], err = regexp.Compile("\\W+the\\W+"); + if err != nil { + panic(err); + } + stopWords[21], err = regexp.Compile("\\W+to\\W+"); + if err != nil { + panic(err); + } + stopWords[22], err = regexp.Compile("\\W+was\\W+"); + if err != nil { + panic(err); + } + stopWords[23], err = regexp.Compile("\\W+were\\W+"); + if err != nil { + panic(err); + } + stopWords[24], err = regexp.Compile("\\W+will\\W+"); + if err != nil { + panic(err); + } + stopWords[25], err = regexp.Compile("\\W+with\\W+"); + if err != nil { + panic(err); + } +} + +func main() { + // var words map[string]index + var p_dir, w, fname string; + var err error; + var i, j int; + var words map[string]*wordList; + var cur *wordList; + var tmp *index; + var sorted []wordSort; + + var files []os.FileInfo; + var dir, fd *os.File; + var dir_info, fd_info os.FileInfo; + var dir_mode os.FileMode; + + var doc *document; + + flag.StringVar(&p_dir, "d", "./pages", "pages directory"); + + flag.Parse(); + + words = make(map[string]*wordList); + + dir, err = os.Open(p_dir); + if err != nil { + log.Printf("Error accessing \"%s\":\t%s\n", p_dir, err); + os.Exit(1); + } + + dir_info, err = dir.Stat(); + dir_mode = dir_info.Mode(); + + if !dir_mode.IsDir() { + log.Printf("\"%s\" is not a directory\n", p_dir); + os.Exit(1); + } + + files, err = dir.Readdir(0); + if err != nil { + log.Printf("Error reading %s\n", p_dir); + os.Exit(1); + } + + for i = 0; i < len(files); i++ { + fd, err = os.Open(fmt.Sprintf("%s/%s", dir_info.Name(), files[i].Name())); + fd_info, err = fd.Stat(); + if err != nil { + log.Printf("Error getting info\n"); + os.Exit(1); + } + fname = fd_info.Name(); + + if err != nil { + log.Printf("Error reading %s/%s\n", dir_info.Name(), files[i].Name()); + } else { + fmt.Printf("Indexing %s...\n", fname); + doc, err = parseDoc(fd, fd_info); + if err != nil { + log.Printf("Error parsing %s/%s\n", dir_info.Name(), files[i].Name()); + } else { + /* Text */ + for j = 0; j < len(doc.text); j++ { + w = strings.ToLower(doc.text[j]); + + if words[w] == nil{ + tmp = &index{doc: doc, title: false, freq: 0}; + words[w] = &wordList{this: tmp, next: nil}; + } + + for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{} + + if cur.this.doc.fname == fname { + cur.this.freq++ + } else if cur.next == nil { + tmp = &index{doc: doc, title: false, freq: 1}; + cur.next = &wordList{this: tmp, next: nil}; + } else { + panic(fmt.Sprintf("%v", cur)); + } + } + /* Title */ + for j = 0; j < len(doc.title); j++ { + w = strings.ToLower(doc.title[j]); + + if words[w] == nil{ + tmp = &index{doc: doc, title: true, freq: 0}; + words[w] = &wordList{this: tmp, next: nil}; + } + + for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{} + + if cur.this.doc.fname == fname { + cur.this.title = true; + cur.this.freq++; + } else if cur.next == nil { + tmp = &index{doc: doc, title: true, freq: 1}; + cur.next = &wordList{this: tmp, next: nil}; + } else { + panic(fmt.Sprintf("%v", cur)); + } + } + } + } + fd.Close(); + } + sorted = make([]wordSort, len(words)); + i = 0; + for k,v := range words { + sorted[i].w = k; + sorted[i].root = v; + i++; + } + + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].w < sorted[j].w; + }); + + fd,_ = os.Create("index.dat"); + printIndex(sorted, fd); + fd.Close(); +} diff --git a/CSC2636/search/search.go b/CSC2636/search/search.go new file mode 100644 index 0000000..c144055 --- /dev/null +++ b/CSC2636/search/search.go @@ -0,0 +1,144 @@ +/************************************************ + * README * + * In order for search/index to be accessible * + * you must link this folder (search) into your * + * GOPATH * + ************************************************/ + + +package main + +import "search/index" +import "os" +import "fmt" +import "sort" +import "flag" +import "strings" + +type res struct { + doc string; + score float64; +}; + +func main() { + var init_index, sIndex index.I_index; + var tmp, results, root *index.I_entry; + var tmp_score float64; + var scores map[string]map[string]float64; // scores[doc][word] == score + var i,j int; + var searchBool, perWord, docAdded map[string]bool; //map[doc]bool + var resultSort []res; + var err error; + var fname, s string; + var search []string; + + flag.StringVar(&fname, "f", "./index.dat", "Index file"); + flag.StringVar(&s, "s", "" , "Search phrase"); + + flag.Parse(); + if len(s) == 0 { + fmt.Printf("Usage: search -s \"search phrase\" [-f index_file]"); + os.Exit(1); + } else { + search = strings.Fields(s); + } + + scores = make(map[string]map[string]float64); + searchBool = make(map[string]bool); + perWord = make(map[string]bool); + docAdded = make(map[string]bool); + + + sIndex = make(index.I_index); + + + + init_index, err = index.NewInvertedIndexFromFile(fname); + if err != nil { + panic(err) + } + for i = 0; i < len(search); i++ { + sIndex[search[i]] = init_index[search[i]] + } + + for _, v := range sIndex { + for tmp = v; tmp != nil; tmp = tmp.Next { + searchBool[tmp.This.Doc] = true; + scores[tmp.This.Doc] = make(map[string]float64); + } + } + + for _, v := range sIndex { + for tmp = v; tmp != nil; tmp = tmp.Next { + perWord[tmp.This.Doc] = true; + } + for d := range searchBool { + if !perWord[d] { + searchBool[d] = false; + } + } + perWord = make(map[string]bool); + } + + for k, v := range sIndex { + for tmp = v; tmp != nil; tmp = tmp.Next { + if searchBool[tmp.This.Doc] { + if tmp.This.In_title { + tmp_score = 1.0; + } else { + tmp_score = 0.0; + } + + scores[tmp.This.Doc][k] = (0.9 * tmp.This.Freq) + (0.1 * tmp_score); + } + } + + } + + i = 0; + results = &index.I_entry{nil, nil} + root = &index.I_entry{nil, nil}; + results.Next = root; + + j = 0; + + for _ ,v := range sIndex { + for tmp = v; tmp != nil; tmp = tmp.Next { + if (searchBool[tmp.This.Doc]) { + root.This = tmp.This; + docAdded[root.This.Doc] = false; + root.Next = &index.I_entry{nil, nil}; + root = root.Next; + j++ + } + } + } + + resultSort = make([]res, j); + + i = 0; + for root = results.Next; root.Next != nil; root = root.Next { + if (!docAdded[root.This.Doc]) { + j = 0; + tmp_score = 0; + for _ ,v := range scores[root.This.Doc] { + tmp_score += v; + j++; + } + tmp_score /= float64(j); + resultSort[i] = res{root.This.Doc, tmp_score}; + docAdded[root.This.Doc] = true; + i++; + } + } + resultSort = resultSort[:i]; + + sort.Slice(resultSort, func(i, j int) bool { + return resultSort[i].score > resultSort[j].score; + }); + + fmt.Printf("Results: %d\n", len(resultSort)); + for i = 0; i < len(resultSort); i++ { + fmt.Printf("\t%d. Doc: %s, Score: %.3f\n", i, resultSort[i].doc, resultSort[i].score); + } +} diff --git a/CSC2636/webCrawler2/README.rst b/CSC2636/webCrawler2/README.rst new file mode 100644 index 0000000..1168fb9 --- /dev/null +++ b/CSC2636/webCrawler2/README.rst @@ -0,0 +1,13 @@ +=========== +Web Crawler +=========== + +Web crawler for Web Science class + +Dependencies +============ +- `GoQuery `_. + +Authors +======= +- Tucker Evans diff --git a/CSC2636/webCrawler2/crawler.go b/CSC2636/webCrawler2/crawler.go new file mode 100644 index 0000000..5c4dba6 --- /dev/null +++ b/CSC2636/webCrawler2/crawler.go @@ -0,0 +1,164 @@ +package main + +import "crypto/md5" +import "fmt" +import "github.com/PuerkitoBio/goquery" +import "log" +import "net/url" +import "os" +import "strconv" +import "strings" +import "sync" +import "sync/atomic" +import "time" + +type link struct { + u *url.URL + depth int +} + +var mutex *sync.Mutex +var Prev map[string]bool +var base string +var links_visited uint64 = 0 + +func validLink(s string) bool { + return true + //return (strings.HasSuffix(s, ".html") || strings.HasSuffix(s, "/") || strings.HasSuffix(s, "\\")) +} + +func addLinks(doc *goquery.Document, jobs chan link, current link, depth int, worker_id int) { + doc.Find("body a").Each(func(index int, item *goquery.Selection) { + link_s, _ := item.Attr("href") + + d := depth + 1 + + u, err := url.Parse(link_s) + if err != nil { + panic(err) + } + + if !u.IsAbs() { + u = current.u.ResolveReference(u) + } + if strings.Contains(u.String(), base) && validLink(u.String()) { + mutex.Lock() + if !Prev[u.String()] { + jobs <- link{u, d} + Prev[u.String()] = true + } + mutex.Unlock() + } + }) +} + +func consume(doc *goquery.Document, url link, worker_id int) { + f, _ := os.Create(fmt.Sprintf("./pages/%x", md5.Sum([]byte(url.u.String())))) + s, _ := doc.Html() + f.Write([]byte(s)) +} + +func worker(done chan bool, jobs chan link, depth int, id int, total uint64) { + for { + x := atomic.LoadUint64(&links_visited) + if x >= total { + done <- true + return + } + + atomic.AddUint64(&links_visited, 1) + select { + case j := <-jobs: + if j.depth < depth { + doc, err := goquery.NewDocument(j.u.String()) + if err != nil { + log.Print("Error Reading Document: " + j.u.String() + err.Error()) + break + } + + fmt.Printf("worker %d Working on %s...\n", id, j.u.String()) + + consume(doc, j, id) + addLinks(doc, jobs, j, j.depth, id) + } + case <-time.After(time.Second * 10): + fmt.Printf("Worker %d done\n", id) + done <- true + return + } + } +} + +func init() { + mutex = &sync.Mutex{} + Prev = make(map[string]bool) + var err error + + fi, err := os.Lstat("./pages"); + if err != nil { + fmt.Printf("INIT ERROR: %s\n", err); + } + + if (fi == nil) { + os.Mkdir("./pages", 0755); + } else if (fi.Mode().IsRegular()) { + panic("pages is not a valid directory\n") + } + +} + +func main() { + var d, w, b int + var t uint64 + + if len(os.Args) < 5 { + fmt.Printf("usage: crawler url depth max_links workers\n") + panic("test") + } + + base = strings.TrimPrefix(os.Args[1], "http://www.") + base = strings.TrimPrefix(base, "https://www.") + if base == os.Args[1] { + panic(base) + } + + d, _ = strconv.Atoi(os.Args[2]) + b, _ = (strconv.Atoi(os.Args[3])) + t = uint64(b) + b, _ = (strconv.Atoi(os.Args[3])) + t = uint64(b) + w, _ = strconv.Atoi(os.Args[4]) + + jobs := make(chan link, 1024*1024) + done := make(chan bool) + + u, err := url.Parse(os.Args[1]) + if err != nil { + panic(err) + } + + if !u.IsAbs() { + panic("Cannot start with relative url") + } + jobs <- link{u, 0} + + //send first job + + for i := 0; i < w; i++ { + go worker(done, jobs, d, i, t) + } + + for i := 0; i < w; { + select { + case <-done: + i++ + case <-time.After(1 * time.Second): + if len(jobs) == (1024 * 1024) { + i = w + } + } + } + + close(done) + close(jobs) +} diff --git a/alarm/alarm.asm b/alarm/alarm.asm deleted file mode 100644 index dd094ed..0000000 --- a/alarm/alarm.asm +++ /dev/null @@ -1,65 +0,0 @@ -; -;30h-39h hold the 7seg bit patterns - mov 30h, #11000000B - mov 31h, #11111001B - mov 32h, #10100100B - mov 33h, #10110000B - mov 34h, #10011001B - mov 35h, #10010010B - mov 36h, #10000010B - mov 37h, #10000111B - mov 38h, #10000000B - mov 39h, #10011000B - - mov -start: - - - -jmp start -;40h-42h hold the 3 digits to display - -display: - mov acc, 50h - mov b, #1000 - div ab - mov 40h, acc - mov acc, b - mov b, #100 - div ab - mov 41h, acc; save 100's digit - mov acc, b ;put remainder in a - mov b, #10 ;find 10's & 1's digit - div ab - mov 42h, acc ;save 10's digit - mov 43h, b ;save 1's digit - - - -;display: - mov r1, #40h ;digits[0] - -loop: ;for each digit - mov acc, #30h - mov r3, acc ;save acc;next blob: - -;safe if you don't care about the -;bits of P3 other than p3.3 & p3.4 - mov a, r1 ;picks which - cpl a ;7seg to use p3.3 & - anl a, #03h ;p3.4 are a func - rl a ;of the low2 bits of - rl a ;the addr where the - rl a ;digits live - mov p1, #0ffh;undraw previous - mov p3, a ;set new 7seg - mov a, r3 ;restore acc - -; p1 = pattern[digit[i]] - add a, @r1 - mov r0, acc - mov p1, @r0 - inc r1 -; mov p1, #0ffh - cjne r1, #44h, loop - ret diff --git a/alarm/assign.rst b/alarm/assign.rst deleted file mode 100644 index f9c802a..0000000 --- a/alarm/assign.rst +++ /dev/null @@ -1,25 +0,0 @@ -=========== -Alarm clock -=========== - -Write an 8051 program which implements an alarm clock. This project will -have two phases, the first of which is described here. The alarm clock -runs all the time, with the current time displayed on the four 7-segment -displays. In this first phase your clock needs to: - 1. use timers to keep time updated to the second - 2. switch between 12 hour and 24 hour modes based switch bank.0 (0==12 - hr, 1==24hr) - 3. if you are in 12 hour mode, use the decimal point on the last - 7-segment display to indicate AM or PM (off = AM, on == PM) - 4. switch between display of hours:minutes and minutes:seconds based on - switch bank.1 (0==hours:minutes, 1==minutes:seconds) - 5. keep the decimal point of the second 7-segment display lit as a - visual separator - -AM/PM should remain indicated regardless of which display mode you are -using. - -You do not (yet) need to be able to: - - set the time (pick a starting value) - - set an alarm time - - perform an alarm diff --git a/filesystem/README.rst b/filesystem/README.rst deleted file mode 100644 index e69de29..0000000 diff --git a/filesystem/disk.h b/filesystem/disk.h deleted file mode 100644 index 7bf6ad0..0000000 --- a/filesystem/disk.h +++ /dev/null @@ -1,17 +0,0 @@ -#include -#include -#include - -#define TRACKS 128 -#define SECTORS 4096 - -/* 10ms per track change */ -/* 10ms/SECTORS per unit sector distance > 1 */ - -void dinit(); - -void rsector(int t,int s,unsigned char *b); - -void wsector(int t,int s,unsigned char *b); - - diff --git a/filesystem/disk.o b/filesystem/disk.o deleted file mode 100644 index b1a36f8..0000000 Binary files a/filesystem/disk.o and /dev/null differ diff --git a/filesystem/tfs.c b/filesystem/tfs.c deleted file mode 100644 index fe2d94c..0000000 --- a/filesystem/tfs.c +++ /dev/null @@ -1,352 +0,0 @@ -#include -#include -#include -#include -#include "disk.h" - -#define MAX_INODES 1000 -#define INODE_START (TRACKS * SECTORS) / (8 * 512) -#define MAX_FILES 50 - -struct block_ptr { - char track; - short sector; -}; -struct blockll{ - struct block_ptr data; - struct blockll *next; -}; - -struct meta_data{ - char name[8]; - int size; - int read; - int write; - int create; -}; - -struct inode { - struct meta_data info; - struct block_ptr data[20]; -}; - - -struct inode_list{ - struct inode *node; - struct inode_list *next; -}; - -struct file { - struct inode *node; - int mode; - int next_sec; - int free; -}; - - -int inode_list_size = 0; -struct inode_list *root, *end; - -char bitmap[TRACKS][SECTORS/8]; - -struct file files[MAX_FILES]; -int size; - - - -int check_bitmap(t,s) -int t,s; -{ - char tmp; - tmp = bitmap[t][s/8]; - tmp &= (1 << (s % 8)); - return (int) tmp; -} - -void set_bitmap(t,s) -int t,s; -{ - bitmap[t][s/8] |= (1 << (s % 8)); - return; -} - -void print_bitmap() -{ - int i,j; - for(i = 0; i < 128; i++){ - - printf("\n%4d ", i); - for (j = 0; j < 4096/8; j++) { - printf("%02x", bitmap[i][j]); - if (j %31 == 0) { - printf("\n%4d ",i); - } - } - } -} - - -/* TODO - * Implement inode table as binary tree to speedup searches - */ -struct inode* inode_search(name) -char *name; -{ - if (strcmp(name,"") == 0) { - return -1; - } - int i; - struct inode_list *tmp = root; - - for(i = 0; i < MAX_INODES && i < inode_list_size; i++){ - tmp = tmp->next; - if(strcmp(name, tmp->node->info.name) == 0) - return tmp->node; - } - return -2; -} - -struct blockll* get_blocks(size) -int size; -{ - int i, t, s; - struct blockll *root, *current = malloc(sizeof(struct blockll)); - root = current; - - for (i = 0; size > 0 && i < (4096 * 128); i++) { - t = i / 4096; - s = i % 4096; - - if (!check_bitmap(t, s)) { - current->next = malloc(sizeof(struct blockll)); - current = current->next; - current-> next = NULL; - current->data.track = (char) t; - current->data.sector = (short) s; - - set_bitmap(t,s); - size-= 512; - } - } - - return i <(4096 * 128) ? root : NULL; -} - -struct inode_list* inode_create(name) -char *name; -{ - struct timeval *tmp_time = malloc(sizeof(struct timeval)); - - struct inode_list *tmp = malloc(sizeof(struct inode_list)); - struct inode *tmp_node = malloc(sizeof(struct inode)); - - - tmp->node = tmp_node; - - memcpy(&(tmp->node->info.name), name, strlen(name)); - - gettimeofday(tmp_time, NULL); - - tmp->node->info.create = tmp_time->tv_sec; - tmp->node->info.read = tmp_time->tv_sec; - tmp->node->info.write = tmp_time->tv_sec; - - end->next = tmp; - end = tmp; - inode_list_size++; - - return tmp; -} - -int inode_init() -{ - int n = MAX_INODES / 4; - int i; - char *ptr; - struct inode_list *tmp; - - if (MAX_INODES % 4 > 0) - n++; - - char *buf = malloc(512 * n); - - for (i =0; i < n; i++) { - rsector(0, i, buf + (512 * i)); - } - ptr = buf; - - tmp = root; - - for(i=0; i< MAX_INODES; i++) { - tmp->next = malloc(sizeof(struct inode_list)); - memcpy(&tmp->node, ptr, 64); - ptr += 64; - tmp = tmp->next; - inode_list_size++; - } -} - -/*save inodes to first n sectors on disk*/ -void inode_save() -{ - int i, j; - char *buf = malloc(512); - struct inode_list *tmp = root; - - for (i = 0; i < MAX_INODES && tmp->next;i++) { - for (j = 0; j < 4; j++){ - tmp = tmp->next; - memcpy(buf + j, tmp->node, sizeof(struct inode)); - } - wsector(0, INODE_START + i, buf); - } -} - -struct inode* inode_from_fd(fd) -int fd; -{ - int i; - struct inode_list *tmp = root; - - for (i = 0; i < fd; i++) { - tmp = tmp->next; - } - - return tmp->node; -} - -int find_fd() -{ - int i; - for (i = 0; i < size; i++) { - if (files[i].free) - return i; - } -} - -int tfs_init() -{ - int i; - root = malloc(sizeof(struct inode_list)); - end = root; - - dinit(); - /* - * - * has issue if inodes have not been written to disk i.e. first run - inode_init(); - */ - for (i = 0; i < MAX_FILES; i++) { - files[i].free = 1; - } -} - - -int open(fname, mode) -char *fname, *mode; -{ - struct inode *fnode = inode_search(fname); - int fd; - - if (fnode == -1) - return -1; - - if (fnode == -2){ - fnode = inode_create(fname)->node; - } - fd = find_fd(); - - files[fd].node = fnode; - files[fd].mode = *mode; - files[fd].next_sec = 0; - files[fd].free = 0; - size++; - - return fd; -} - -int close(fd) -int fd; -{ - if (files[fd].free) - return -1; - - files[fd].free = 1; - return 1; -} - -int read(fd, buf) -int fd; -char *buf; -{ - if (files[fd].free || files[fd].mode || (files[fd].next_sec == 20)) - return -1; - - - rsector(files[fd].node->data[files[fd].next_sec].track, files[fd].node->data[files[fd].next_sec].sector, buf); - files[fd].next_sec++; - return 512; -} - -int write(fd, buf) -int fd; -char *buf; -{ - if ((files[fd].next_sec == 20) || !files[fd].mode) - return 0; - - struct blockll *tmp = get_blocks(500); - files[fd].node->data[files[fd].next_sec].track = tmp->data.track; - files[fd].node->data[files[fd].next_sec].sector = tmp->data.sector; - - wsector(files[fd].node->data[files[fd].next_sec].track, files[fd].node->data[files[fd].next_sec].sector, buf); - return 1; -} - -int ulink(fname) -char *fname; -{ - struct inode_list *tmp = root; - struct inode *d; - int i; - - for(i = 0; i < MAX_INODES && i < inode_list_size; i++){ - tmp = tmp->next; - if(strcmp(fname, tmp->next->node->info.name) == 0) - break;; - } - - d = tmp->next->node; - tmp->next = tmp->next->next; - free(d); - return 1; - } - -int main() -{ - tfs_init(); - - /* - *Test Writing - */ - int mode = 1; - int fd = open("test", &mode); - - char buf[512]; - memcpy(&buf, "Hello Filesystem", strlen("Hello Filesystem")); - - int test = write(fd, &buf); - close(fd); - - /* - *Test reading - */ - mode = 0; - fd = open("test", &mode); - char buf2[512]; - read(fd, &buf2); - printf("wrote: %s\n", buf); - - printf("read: %s\n", buf2); - -} diff --git a/jobScheduler/cpuScheduleTable.c b/jobScheduler/cpuScheduleTable.c deleted file mode 100644 index 5d2256a..0000000 --- a/jobScheduler/cpuScheduleTable.c +++ /dev/null @@ -1,224 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define PROCESS_COUNT 4 -#define time_calc(x, y) ((y.tv_sec - x.tv_sec) * 1000000) + (y.tv_usec - x.tv_usec) - - -/* TODO -add heap for priority -compute priority -*/ - -int go=1; -int turn=0; -float v; -float throughput; -struct timeval start_time; -int turns; -int completed_jobs; - -struct process { - int virgin; - int (*proc)(int); - int turnaround[9]; - struct timeval start_t; - int runs; - float save; - int turn; -}; - -struct process pt[PROCESS_COUNT]; /*process table*/ - -void -cpusched(signum) -int signum; -{ - pt[turn].virgin=0; - pt[turn].save=v; - go=0; -} - -int -f0(x) -int x; -{ - register int i=0; - register int t; - signal(SIGUSR1, cpusched); - if (!x) goto start; - v=0.0; - -start: - while (go && (v > -200.0)) { - printf("0");fflush(stdout); - t = (rand() % 1024); - v -= 2.0; - if ((v>0.0) || ((((int) v) % 2) == -1)) { - printf("f0, found odd or positive, v= %f\n", v); - exit(1); - } - usleep(t*100); - } - if (v <= -200.0) pt[0].virgin=1; - go=1; -} - -int -f1(x) -int x; -{ - register int i=0; - register int t; - if (!x) goto start2; - v= -1.0; - -start2: - while (go && (v > -401.0)) { - printf("1");fflush(stdout); - t = (rand() % 2048); - v -= 2.0; - if ((v>0.0) || ((((int) v) % 2) != -1)) { - printf("f1, found even or positive\n"); - exit(1); - } - usleep(t*100); - } - if (v <= -401.0) pt[1].virgin=1; - go=1; -} - -int -f2(x) -int x; -{ - register int i=0; - register int t; - if (!x) goto start3; - v= 1.0; - -start3: - while (go) { - printf("2");fflush(stdout); - t = (rand() % 4096); - v += 2.0; - if ((v<0.0) || ((((int) v) % 2) != 1)) { - printf("f2, found even or negative\n"); - exit(1); - } - usleep(t*100); - } - go=1; -} - -int -f3(x) -int x; -{ - register int i=0; - register int t; - if (!x) goto start4; - v= 0.0; - -start4: - while (go) { - printf("3");fflush(stdout); - t = (rand() % 4096); - v += 2.0; - if ((v<0.0) || ((((int) v) % 2) == 1)) { - printf("f3, found odd or negative\n"); - exit(1); - } - usleep(t*100); - } - go=1; -} - -void turnaround_calc(p, n) -struct process *p; -int n; -{ - int i; - p->turnaround[8] = 0; - for (i = 0; i < n; i++) { - p->turnaround[8] += p->turnaround[i] / n; - } - return; -} - -int main(argc, argv, envp) -int argc; -char **argv, **envp; -{ - int pid, i, last; - struct timeval end_t; - gettimeofday(&start_time, NULL); - - for (i = 0; i < PROCESS_COUNT; i++) { - pt[i].virgin = 1; - } - - pt[0].proc=f0; - pt[1].proc=f1; - pt[2].proc=f2; - pt[3].proc=f3; - - signal(SIGUSR1, cpusched); - if (pid=fork()) { - while (1) { - go = 1; - sleep(5); - if (go) - kill(pid, SIGUSR1); - } - } else { - while (1) { - printf("turn= %d\n", ++turns); - v=pt[turn].save; - if (pt[turn].virgin) - gettimeofday(&pt[turn].start_t, NULL); - - pt[turn].proc(pt[turn].virgin); - - gettimeofday(&end_t, NULL); - - if (pt[turn].virgin) { - pt[turn].turnaround[pt[turn].runs % 8] = time_calc(pt[turn].start_t, end_t); - turnaround_calc(&pt[turn], (pt[turn].runs < 8 ? pt[turn].runs + 1 : 8)); - pt[turn].runs++; - completed_jobs++; - kill(getppid(), SIGUSR1); - } else if (pt[turn].runs == 0){ - pt[turn].turnaround[8] = (time_calc(pt[turn].start_t, end_t)) / 2; - } - - throughput = completed_jobs / (float)(end_t.tv_sec - start_time.tv_sec); - pt[turn].turn++; - - if (turns == 1000) { - kill(getppid(), 9); - exit(1); - } - - printf("\n"); - - for (i = 0; i < 4; i++){ - printf("\t[%d]\tturnaround= %9d\truns= %3d\tturn= %d\n", i, pt[i].turnaround[8], pt[i].runs, pt[i].turn); - } - - printf("\ntime= %5d\tthroughput= %9f\tcompleted_jobs= %d\n\n\n", end_t.tv_sec - start_time.tv_sec, throughput, completed_jobs); - - last = turn; - turn = 0; - for (i = 1; i < PROCESS_COUNT; i++) { - turn = ((pt[turn].turnaround[8] < pt[i].turnaround[8]) && (turn != last)) ? turn : i; - } - - } - } -} diff --git a/search/.gitignore b/search/.gitignore deleted file mode 100644 index 7523492..0000000 --- a/search/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -*test* -pages -index.dat -indexer -search -*.swp diff --git a/search/README.rst b/search/README.rst deleted file mode 100644 index e1d14fb..0000000 --- a/search/README.rst +++ /dev/null @@ -1,19 +0,0 @@ -============= -Search Engine -============= - -Setup -===== -In order for search.go to use the index package the directory "index" -must by copied (or linked) into a directory "search" that is in your -GOPATH. - -About -===== -Search Engine for web science class. - -See assign.rst for assignment details. - -Authors -======= -- Tucker Evans diff --git a/search/assign.rst b/search/assign.rst deleted file mode 100644 index 66e537e..0000000 --- a/search/assign.rst +++ /dev/null @@ -1,213 +0,0 @@ -======================== -Project 2: Search Engine -======================== - -**CS2621– Web Science** - -*100 points* - -You are to create a web search engine that works at the command line. -To do this, you will write two Python scripts, indexer.py and -search.py. - -Indexer -======= - -Indexer.py should do the following: - -1. After performing a crawl (using your other Python script), read all - the HTML files that were stored in the “pages” directory. For each - document, extract the title and the text from the body of the page - (read the Beautiful Soup documentation to find out how). Beautiful - Soup will include the text of the page in the content of the page, - and that is OK. Beautiful Soup may also break on some pages and - include HTML as text, but we will not worry about these - exceptions or bugs. - -2. All text should be converted to lowercase and non-alphanumeric - characters should be ignored. So “123-456” would become “123” and - “456”, and “joe@yahoo.com” would become “joe”, “yahoo”, “com”. - Ignore the following stop words: a, an, and, are, as, at, be, by, - for, from, has, he, in, is, it, its, of, on, that, the, to, was, - were, will, with. Do not perform stemming. - -3. A single inverted index should be created for the document corpus - which maintains the document ID (numbered 1…n in order of the pages - found in the “pages” directory), a 1 or 0 if the text is found in - the title, and the term frequency from the body (normalized by the - total number of tokens in the document after removing stop words). - -4. After indexer.py has finished indexing all the web pages, it should - output the index to index.dat which looks likethis: - -:: - - arkansas - 6 0 0.022 - model - 1 0 0.309 - 3 0 0.015 - 5 1 0.001 - tuesday - 2 0 0.082 - white - 2 1 0.018 - etc… - -.. note :: - The indexed words are alphabetized, and there are 3 spaces before - sets of three numbers (each separated by a single space) which are: - doc ID, title (0 or 1), and normalized body TF (rounded to 3 decimal - places). For example, the term white was found only in document 2; - it was somewhere in the title and made up 1.8% of all the words in - the document. - -5. It may take some time for your program to run, so you should output - information about the program’s status as it indexes the crawled - pages. Outputting what file is being worked on would be helpful to - the user who is waiting for the program to finish its work. - -Search -====== - -After the index is written to index.dat, the search.py script will -allow the user to search the corpus for specific words. Here is how -it should operate: - -1. First, read the search phrase at the command line. Examples: - - .. code :: bash - - $ search.py bisons - $ search.py "landmark college" - -If no command line argument is supplied, the program should tell the -user a search term is required and terminate. Ignore any command-line -arguments after the first. - -2. Next, the program should read the index from index.dat into memory. - Note that you may want to use similar data structures used in - indexer.py, so you should write your programs in a way where you - share code without having redundant code in each script. (It’s OK - to introduce new .py files to your project.) - -3. For simplicity, all queries will be assumed to use boolean ANDs, - and we will not implement phrase search. For example, the query - landmark college should generate a boolean search for landmark AND - college, so only documents containing both terms should be - considered amatch. - -4. Remove any stop words from the query as was done when indexing the - documents. - -5. After determining which documents match the search terms, calculate - the relevancy score for each document: relevancy score = 0.9 * body - TF + 0.1 * title score Do this for each term, and compute the - average relevancy score for all terms. So if the search was for - landmark college, you would compute the score for landmark and the - score for college and compute the average to determine the overall - relevancy score. - -6. The total number of results should first be displayed. Then display - every document ID and score (out to 3 decimal places) ordered by - score, and number the results. Example: - -.. code:: bash - - Results: 4 - 1. docID, 3, score, 0.830 - 2. docID, 1, score, 0.814 - 3. docID, 5, score, 0.350 - 4. docID, 8, score, 0.108 - -**Bonus:** You can receive 5 bonus points by implementing phrase search. -So when the user searches for “landmark college”, assume they want -only documents with that exact phrase. To accomplish this, you will -need to store the positions of the terms that are stored in the -inverted index. Then use those positions to ensure the phrase matches -successive positions. - - -Zip your entire project directory and submit it -to Canvas before it is due. Make sure your output matches the -specifications precisely to avoid losing any points. If you use any -code you find in the Web, you must document the source in your -program. - -Test Data -========= - -*a.html* - -.. code:: html - - cool!!! test!!! - - this 123-456. - - -*b.html* - -.. code:: html - - - - Go Patriots! - - - And another test and test! - - - -*c.html* - -.. code:: html - - - This is a test. - - -*Inverted index:* - -.. code:: - - 123 - a 0 0.200 - 456 - a 0 0.200 - another - b 0 0.200 - cool - a 1 0.200 - patriots - b 1 0.200 - go - b 1 0.200 - test - a 1 0.200 - c 0 0.500 - b 0 0.400 - this - a 0 0.200 - c 0 0.500 - -Search for "test this" results in the following: - -:: - - Results: 2 - 1. docID c, score 0.450 - 2. docID a, score 0.230 - -Search for "test patriots go" results in the following: - -:: - - Results: 1 - 1. docID b, score 0.310 - -Search for "cool patriots" results in the following: - -:: - - Results: 0 diff --git a/search/index/index.go b/search/index/index.go deleted file mode 100644 index 5d8ab65..0000000 --- a/search/index/index.go +++ /dev/null @@ -1,165 +0,0 @@ -package index - -import "fmt" -import "os" -import "io" -import "bufio" -import "sort" -import "errors" -import "strings" -import "strconv" - -/* TODO - - - Implement Forward Creation - - Implement Inverted from Forward - - Switch Indexer.go over to this package - -/********* - * Types * - *********/ - -type F_info struct { - Word string; - In_title bool; - Freq float64; -}; - -type I_info struct { - Doc string; - In_title bool; - Freq float64; -}; - -type F_entry struct{ - This *F_info; - Next *F_entry; -}; - -type I_entry struct{ - This *I_info; - Next *I_entry; -}; - -type F_index map[string]*F_entry; -type I_index map[string]*I_entry; - -type sortInverted struct{ - w string; - root *I_entry; -}; - - -/*************************** - * Forward Index Funcitons * - ***************************/ - -func NewForwardEntryStrings(text, title []string) (*F_entry, error) { - return nil, errors.New("not implemented"); -} - -/**************************** - * Inverted Index Functions * - ****************************/ - -func new_I_info() *I_info{ - return &I_info{"", false, 0.0}; -} - -func NewInvertedIndexFromFile(fname string) (I_index, error) { - var fd *os.File; - var br *bufio.Reader; - var err error; - var buf []byte; - var tmp *I_info; - var cur *I_entry; - var index I_index; - var word string - var info []string; - - fd, err = os.Open(fname); - if err != nil { - return nil, err; - } - - br = bufio.NewReader(fd); - if br == nil { - return nil, errors.New("Could not initialize reader"); - } - - index = make(I_index); - - for buf, err = br.ReadBytes('\n'); err != io.EOF; buf, err = br.ReadBytes('\n'){ - tmp = new_I_info(); - if err != nil { - return nil, err; - } - if buf[0] != '\t' { - word = strings.TrimSpace(string(buf)); - } else { - info = strings.Fields(string(buf)); - tmp.Doc = info[0]; - tmp.In_title = (info[1] == "1"); - tmp.Freq, _ = strconv.ParseFloat(info[2], 32); - if (index[word] == nil) { - index[word] = &I_entry{This: tmp, Next: nil}; - } else { - cur = index[word]; - for cur.Next != nil { - cur = cur.Next; - } - cur.Next = &I_entry{This: tmp, Next: nil}; - } - } - } - - return index, nil; -} - -func NewInvertedFromForward(f F_index) (I_index, error) { - return nil, errors.New("not implemented"); - -} - -func (x I_index) PrintToFile(fd *os.File) error{ - var i int; - var cur *I_entry; - var index []sortInverted; - - index = x.sortIndex(); - - for i = 0; i < len(index); i++ { - fmt.Fprintf(fd, "%s\n", index[i].w); - for cur = index[i].root; cur != nil; cur = cur.Next { - fmt.Fprintf(fd, "\t%s %d %.3f\n", cur.This.Doc, toInt(cur.This.In_title), cur.This.Freq); - } - } - return nil; -} - -func toInt(t bool) int{ - if (t){ - return 1; - } - return 0; -} - -func (unsort I_index) sortIndex() []sortInverted { - var i int; - var sorted []sortInverted; - - sorted = make([]sortInverted, len(unsort)); - - i = 0; - for k, v := range unsort { - sorted[i].w = k; - sorted[i].root = v; - i++; - } - - sort.Slice(sorted, func(i, j int) bool { - return sorted[i].w < sorted[j].w; - }); - - return sorted -} diff --git a/search/indexer.go b/search/indexer.go deleted file mode 100644 index d95f126..0000000 --- a/search/indexer.go +++ /dev/null @@ -1,402 +0,0 @@ -package main - -import "os" -import "sort" -import "golang.org/x/net/html" -import "log" -import "fmt" -import "github.com/PuerkitoBio/goquery" -import "github.com/kennygrant/sanitize" -import "strings" -import "flag" -import "errors" -import "regexp" - -type document struct { - fname string; - title []string; - text []string; - length int; -} - -type index struct { - doc *document; - title bool; - freq int; -} - -type wordSort struct { - w string; - root *wordList; -} - -type wordList struct { - this *index - next *wordList -} - -var r, nonAN *regexp.Regexp; -var stopWords []*regexp.Regexp; - - -func newDocument() *document { - return &document{"" , nil, nil, 0}; -} - -func RemoveNode(r, rn *html.Node) { - var found bool; - var n, item *html.Node; - var nodes map[int]*html.Node; - var i, j int; - - found = false; - nodes = make(map[int]*html.Node); - - for n = r.FirstChild; n != nil; n = n.NextSibling { - if n == rn { - found = true; - n.Parent.RemoveChild(n); - } - - nodes[i] = n; - i++; - } - - if !found { - for j = 0; j < i; j++ { - item = nodes[j]; - RemoveNode(item, rn); - } - } -} -func RemoveTag(doc *goquery.Selection, tag string) { - doc.Find(tag).Each(func(i int, s *goquery.Selection) { - RemoveNode(doc.Get(0), s.Get(0)); - }); -} - -func logReg(h []byte) []byte { - log.Printf("RegExp: %s", h); - return h; -} - -func parseDoc(fd *os.File, f_info os.FileInfo) (*document, error) { - var err error; - var text, t_text string; - var doc *goquery.Document; - var body, title *goquery.Selection; - var r_doc *document; - var i int; - - doc, err = goquery.NewDocumentFromReader(fd); - if err != nil { - log.Printf("goquery error: %s\n", err); - return nil, errors.New("Can't create goquery documnt"); - } - - body = doc.Find("body"); - RemoveTag(body, "script"); - RemoveTag(body, "noscript"); - - title = doc.Find("title"); - - //TODO add error detection - text, err = body.Html(); - t_text, err = title.Html(); - - - text = r.ReplaceAllString(text, "> <"); - t_text = r.ReplaceAllString(t_text, "> <"); - - text = sanitize.HTML(text); - t_text = sanitize.HTML(t_text); - - text = strings.ToLower(text); - t_text = strings.ToLower(t_text); - - text = nonAN.ReplaceAllString(text, " "); - t_text = nonAN.ReplaceAllString(t_text, " "); - - - for i = 0; i < len(stopWords); i++ { - text = stopWords[i].ReplaceAllString(text, " "); - t_text = stopWords[i].ReplaceAllString(t_text, " "); - } - r_doc = newDocument(); - - r_doc.fname = f_info.Name(); - r_doc.text = strings.Fields(text); - r_doc.title = strings.Fields(t_text); - r_doc.length = len(r_doc.text) + len(r_doc.title); - - return r_doc, nil; -} -func boolToInt(t bool) int { - if t { - return 1; - } - return 0; -} - -func printIndex(words []wordSort, fd *os.File) { - var i int; - var cur *wordList; - var fname string; - var t int; - var freq float64; - - for i = 0; i < len(words); i++ { - fmt.Fprintf(fd, "%s\n", words[i].w); - for cur = words[i].root; cur != nil; cur = cur.next { - fname = cur.this.doc.fname; - t = boolToInt(cur.this.title); - freq = float64(cur.this.freq) / float64(cur.this.doc.length); - - fmt.Fprintf(fd,"\t%s %d %.3f\n", fname, t, freq); - } - } -} - -func init() { - var err error; - log.SetOutput(os.Stderr); - r, err = regexp.Compile("><"); - if err != nil { - panic(err); - } - nonAN, err = regexp.Compile("[^a-zA-Z0-9]+"); - if err != nil { - panic(err); - } - //TODO add func to read in stop words from a file; - stopWords = make([]*regexp.Regexp, 26) - if err != nil { - panic(err); - } - stopWords[0], err = regexp.Compile("\\W+and\\W+"); - if err != nil { - panic(err); - } - stopWords[1], err = regexp.Compile("\\W+a\\W+"); - if err != nil { - panic(err); - } - stopWords[2], err = regexp.Compile("\\W+an\\W+"); - if err != nil { - panic(err); - } - stopWords[3], err = regexp.Compile("\\W+and\\W+"); - if err != nil { - panic(err); - } - stopWords[4], err = regexp.Compile("\\W+are\\W+"); - if err != nil { - panic(err); - } - stopWords[5], err = regexp.Compile("\\W+as\\W+"); - if err != nil { - panic(err); - } - stopWords[6], err = regexp.Compile("\\W+at\\W+"); - if err != nil { - panic(err); - } - stopWords[7], err = regexp.Compile("\\W+be\\W+"); - if err != nil { - panic(err); - } - stopWords[8], err = regexp.Compile("\\W+by\\W+"); - if err != nil { - panic(err); - } - stopWords[9], err = regexp.Compile("\\W+for\\W+"); - if err != nil { - panic(err); - } - stopWords[10], err = regexp.Compile("\\W+from\\W+"); - if err != nil { - panic(err); - } - stopWords[11], err = regexp.Compile("\\W+has\\W+"); - if err != nil { - panic(err); - } - stopWords[12], err = regexp.Compile("\\W+he\\W+"); - if err != nil { - panic(err); - } - stopWords[13], err = regexp.Compile("\\W+in\\W+"); - if err != nil { - panic(err); - } - stopWords[14], err = regexp.Compile("\\W+is\\W+"); - if err != nil { - panic(err); - } - stopWords[15], err = regexp.Compile("\\W+it\\W+"); - if err != nil { - panic(err); - } - stopWords[16], err = regexp.Compile("\\W+its\\W+"); - if err != nil { - panic(err); - } - stopWords[17], err = regexp.Compile("\\W+of\\W+"); - if err != nil { - panic(err); - } - stopWords[18], err = regexp.Compile("\\W+on\\W+"); - if err != nil { - panic(err); - } - stopWords[19], err = regexp.Compile("\\W+that\\W+"); - if err != nil { - panic(err); - } - stopWords[20], err = regexp.Compile("\\W+the\\W+"); - if err != nil { - panic(err); - } - stopWords[21], err = regexp.Compile("\\W+to\\W+"); - if err != nil { - panic(err); - } - stopWords[22], err = regexp.Compile("\\W+was\\W+"); - if err != nil { - panic(err); - } - stopWords[23], err = regexp.Compile("\\W+were\\W+"); - if err != nil { - panic(err); - } - stopWords[24], err = regexp.Compile("\\W+will\\W+"); - if err != nil { - panic(err); - } - stopWords[25], err = regexp.Compile("\\W+with\\W+"); - if err != nil { - panic(err); - } -} - -func main() { - // var words map[string]index - var p_dir, w, fname string; - var err error; - var i, j int; - var words map[string]*wordList; - var cur *wordList; - var tmp *index; - var sorted []wordSort; - - var files []os.FileInfo; - var dir, fd *os.File; - var dir_info, fd_info os.FileInfo; - var dir_mode os.FileMode; - - var doc *document; - - flag.StringVar(&p_dir, "d", "./pages", "pages directory"); - - flag.Parse(); - - words = make(map[string]*wordList); - - dir, err = os.Open(p_dir); - if err != nil { - log.Printf("Error accessing \"%s\":\t%s\n", p_dir, err); - os.Exit(1); - } - - dir_info, err = dir.Stat(); - dir_mode = dir_info.Mode(); - - if !dir_mode.IsDir() { - log.Printf("\"%s\" is not a directory\n", p_dir); - os.Exit(1); - } - - files, err = dir.Readdir(0); - if err != nil { - log.Printf("Error reading %s\n", p_dir); - os.Exit(1); - } - - for i = 0; i < len(files); i++ { - fd, err = os.Open(fmt.Sprintf("%s/%s", dir_info.Name(), files[i].Name())); - fd_info, err = fd.Stat(); - if err != nil { - log.Printf("Error getting info\n"); - os.Exit(1); - } - fname = fd_info.Name(); - - if err != nil { - log.Printf("Error reading %s/%s\n", dir_info.Name(), files[i].Name()); - } else { - fmt.Printf("Indexing %s...\n", fname); - doc, err = parseDoc(fd, fd_info); - if err != nil { - log.Printf("Error parsing %s/%s\n", dir_info.Name(), files[i].Name()); - } else { - /* Text */ - for j = 0; j < len(doc.text); j++ { - w = strings.ToLower(doc.text[j]); - - if words[w] == nil{ - tmp = &index{doc: doc, title: false, freq: 0}; - words[w] = &wordList{this: tmp, next: nil}; - } - - for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{} - - if cur.this.doc.fname == fname { - cur.this.freq++ - } else if cur.next == nil { - tmp = &index{doc: doc, title: false, freq: 1}; - cur.next = &wordList{this: tmp, next: nil}; - } else { - panic(fmt.Sprintf("%v", cur)); - } - } - /* Title */ - for j = 0; j < len(doc.title); j++ { - w = strings.ToLower(doc.title[j]); - - if words[w] == nil{ - tmp = &index{doc: doc, title: true, freq: 0}; - words[w] = &wordList{this: tmp, next: nil}; - } - - for cur = words[w];cur.next != nil && cur.this.doc.fname != fname; cur = cur.next{} - - if cur.this.doc.fname == fname { - cur.this.title = true; - cur.this.freq++; - } else if cur.next == nil { - tmp = &index{doc: doc, title: true, freq: 1}; - cur.next = &wordList{this: tmp, next: nil}; - } else { - panic(fmt.Sprintf("%v", cur)); - } - } - } - } - fd.Close(); - } - sorted = make([]wordSort, len(words)); - i = 0; - for k,v := range words { - sorted[i].w = k; - sorted[i].root = v; - i++; - } - - sort.Slice(sorted, func(i, j int) bool { - return sorted[i].w < sorted[j].w; - }); - - fd,_ = os.Create("index.dat"); - printIndex(sorted, fd); - fd.Close(); -} diff --git a/search/search.go b/search/search.go deleted file mode 100644 index c144055..0000000 --- a/search/search.go +++ /dev/null @@ -1,144 +0,0 @@ -/************************************************ - * README * - * In order for search/index to be accessible * - * you must link this folder (search) into your * - * GOPATH * - ************************************************/ - - -package main - -import "search/index" -import "os" -import "fmt" -import "sort" -import "flag" -import "strings" - -type res struct { - doc string; - score float64; -}; - -func main() { - var init_index, sIndex index.I_index; - var tmp, results, root *index.I_entry; - var tmp_score float64; - var scores map[string]map[string]float64; // scores[doc][word] == score - var i,j int; - var searchBool, perWord, docAdded map[string]bool; //map[doc]bool - var resultSort []res; - var err error; - var fname, s string; - var search []string; - - flag.StringVar(&fname, "f", "./index.dat", "Index file"); - flag.StringVar(&s, "s", "" , "Search phrase"); - - flag.Parse(); - if len(s) == 0 { - fmt.Printf("Usage: search -s \"search phrase\" [-f index_file]"); - os.Exit(1); - } else { - search = strings.Fields(s); - } - - scores = make(map[string]map[string]float64); - searchBool = make(map[string]bool); - perWord = make(map[string]bool); - docAdded = make(map[string]bool); - - - sIndex = make(index.I_index); - - - - init_index, err = index.NewInvertedIndexFromFile(fname); - if err != nil { - panic(err) - } - for i = 0; i < len(search); i++ { - sIndex[search[i]] = init_index[search[i]] - } - - for _, v := range sIndex { - for tmp = v; tmp != nil; tmp = tmp.Next { - searchBool[tmp.This.Doc] = true; - scores[tmp.This.Doc] = make(map[string]float64); - } - } - - for _, v := range sIndex { - for tmp = v; tmp != nil; tmp = tmp.Next { - perWord[tmp.This.Doc] = true; - } - for d := range searchBool { - if !perWord[d] { - searchBool[d] = false; - } - } - perWord = make(map[string]bool); - } - - for k, v := range sIndex { - for tmp = v; tmp != nil; tmp = tmp.Next { - if searchBool[tmp.This.Doc] { - if tmp.This.In_title { - tmp_score = 1.0; - } else { - tmp_score = 0.0; - } - - scores[tmp.This.Doc][k] = (0.9 * tmp.This.Freq) + (0.1 * tmp_score); - } - } - - } - - i = 0; - results = &index.I_entry{nil, nil} - root = &index.I_entry{nil, nil}; - results.Next = root; - - j = 0; - - for _ ,v := range sIndex { - for tmp = v; tmp != nil; tmp = tmp.Next { - if (searchBool[tmp.This.Doc]) { - root.This = tmp.This; - docAdded[root.This.Doc] = false; - root.Next = &index.I_entry{nil, nil}; - root = root.Next; - j++ - } - } - } - - resultSort = make([]res, j); - - i = 0; - for root = results.Next; root.Next != nil; root = root.Next { - if (!docAdded[root.This.Doc]) { - j = 0; - tmp_score = 0; - for _ ,v := range scores[root.This.Doc] { - tmp_score += v; - j++; - } - tmp_score /= float64(j); - resultSort[i] = res{root.This.Doc, tmp_score}; - docAdded[root.This.Doc] = true; - i++; - } - } - resultSort = resultSort[:i]; - - sort.Slice(resultSort, func(i, j int) bool { - return resultSort[i].score > resultSort[j].score; - }); - - fmt.Printf("Results: %d\n", len(resultSort)); - for i = 0; i < len(resultSort); i++ { - fmt.Printf("\t%d. Doc: %s, Score: %.3f\n", i, resultSort[i].doc, resultSort[i].score); - } -} diff --git a/sync/assign.rst b/sync/assign.rst deleted file mode 100644 index ea4a566..0000000 --- a/sync/assign.rst +++ /dev/null @@ -1,41 +0,0 @@ -================ -Syncronization 1 -================ - -Write a program that uses semaphores to implement a readers/writers solution. Your program should: - - - be written in C and use the standard kernel IPC mechanisms (semget,semop,shmget etc) - - be written as a single top level source file compiled as: gcc –o myprog myprog.c - - take two arguments on the command line: myprog NR NW where NR,NW are each integers specifying the number of reader/writer processes respectively - - use fork/exec to create the readers/writers - -The shared memory segment should be 16k bytes in size, with all bytes initialized to 0x30 - -Reader: - -.. code :: - - open a file (for append) named reader.N, where N is the reader number - while (1) { - for (i=0; i<16k; i++) { - read the next byte of the shared memory segment - write that byte to the file - flush the file - } - sleep a random number of seconds, between 0 and N inclusive - } - -Writer: - -.. code :: - - while (1) { - for (i=0; i<16k; i++) - shared memory segment[i] = N + 0x30; - sleep a random number of seconds between 0 and 2*N inclusive - } - -readers and writers should be mutually exclusive -multiple concurrent readers are allowed -writers have priority over readers -writers are mutually exclusive relative to each other diff --git a/sync/makefile b/sync/makefile deleted file mode 100644 index dd9c463..0000000 --- a/sync/makefile +++ /dev/null @@ -1,5 +0,0 @@ -sync: sync.c reader.c writer.c - cc -o sync sync.c - cc -o reader reader.c - cc -o writer writer.c - diff --git a/sync/reader.c b/sync/reader.c deleted file mode 100644 index 929eba1..0000000 --- a/sync/reader.c +++ /dev/null @@ -1,86 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NSEM 3 -#define KEY 52 - -char *mem; - -void quit(signum) -int signum; -{ - shmdt(mem); - exit(1); -} - -int main(argc, argv) -int argc; -char **argv; -{ - int shmid, semid, i, pid, id; - char filename[50]; - FILE *fd; - struct sembuf sb; - - if (argc < 2) { - printf("usage: reader [id]\n"); - exit(1); - } - - id = atoi(argv[1]); - - - if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ - perror("shmget: shmget failed"); - exit(1); - } - - if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { - perror("shmat"); - exit(1); - } - - if ((semid = semget(shmid, NSEM, 0)) == -1) { - perror("Rsemget: "); - exit(1); - } - - signal(SIGQUIT, quit); - - sprintf(filename, "reader.%d", id); - - fd = fopen(filename, "a"); - - if (!fd) { - perror("fopen: "); - exit(1); - } - srand(time(NULL)); - - while (1) { - sb.sem_num = 0; sb.sem_op = -1; sb.sem_flg = 0; - semop(semid, &sb, 1); - - for (i = 0; i < 1<<14; i++) { - fprintf(fd, "%c", *(mem + i)); - fflush(fd); - } - fprintf(fd, "\n"); - fflush(fd); - - sb.sem_op = 1; - semop(semid, &sb, 1); - - - sleep(rand() % (id + 1)); - } -} diff --git a/sync/sync.c b/sync/sync.c deleted file mode 100644 index 23d6ba1..0000000 --- a/sync/sync.c +++ /dev/null @@ -1,118 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -#define NSEM 3 - -union semun { - int val; - struct semid_ds *buf; - ushort *array; -}; - - -int shmid, semid; - -void quit(signum) -int signum; -{ - shmctl(shmid, IPC_RMID, NULL); - semctl(semid, IPC_RMID, 0); -} - - -int main(argc, argv) -int argc; -char **argv; -{ - int i, pid, n_read, n_write, w; - char *mem, **arg_r, **arg_w; - union semun semarg; - - if (argc < 2) { - printf("usage: sync [number readers] [number writers]\n"); - exit(1); - } - n_read = atoi(argv[1]); - n_write = atoi(argv[2]); - - if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ - perror("shmget: shmget failed"); - exit(1); - } - - if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { - perror("shmat"); - exit(1); - } -printf("Sshmid: %x\n", shmid); - signal(SIGQUIT, quit); - - for (i = 0; i < 1<<14; i++) { - *(mem + i) = 0x30; - } - - if ((semid = semget(shmid, NSEM, 0666 | IPC_CREAT)) == -1) { - perror("Ssemget: "); - exit(1); - } - - semarg.val = 1; - for (i = 0; i < NSEM; i++) { - if ((semctl(semid, i, SETVAL, semarg)) == -1) { - perror("semctl: "); - exit(1); - } - } - - - arg_r = malloc(sizeof(char*) * 3); - arg_w = malloc(sizeof(char*) * 3); - *arg_r = malloc(sizeof(char) * 10); - *arg_w = malloc(sizeof(char) * 10); - - *(arg_r + 1) = malloc(sizeof(char) * 50); - *(arg_w + 1) = malloc(sizeof(char) * 50); - - *arg_r = "reader"; - *arg_w = "writer"; - - *(arg_r + 2) = NULL; - *(arg_w + 2) = NULL; - - for (i = 0; i < n_read; i++){ - sprintf(*(arg_r + 1), "%d", i); - if (pid = fork()) { - /* printf("starting reader %d...\n", i); */ - } else { - int ret = execv("./reader", arg_r); - printf("exec retern %d", ret); - } - } - - for (i = 0; i < n_write; i++) { - sprintf(*(arg_w + 1), "%d", i); - if (pid = fork()) { - /* printf("starting writer %d...\n", i); */ - } else { - execvp("./writer", arg_w); - } - - } - - shmdt(mem); -printf("sync done...\n"); -/* TODO - * why is this returning 8 - */ - for (i = 0; i < (n_write + n_read); i++) { - wait(&w); - printf("\nReturned with code:%d\n", WEXITSTATUS(w)); - } - quit(); -} diff --git a/sync/writer.c b/sync/writer.c deleted file mode 100644 index d38dd70..0000000 --- a/sync/writer.c +++ /dev/null @@ -1,76 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define NSEM 3 - -char *mem; - -void quit(signum) -int signum; -{ - shmdt(mem); - exit(1); -} - -int main(argc, argv) -int argc; -char **argv; -{ - int shmid, semid, i, pid, id; - char *mem; - struct sembuf sb; - - if (argc < 2) { - printf("usage: writer [id]\n"); - exit(1); - } - - id = atoi(argv[1]); - - - if ((shmid = shmget(52, 1<<14, IPC_CREAT | 0666)) == -1){ - perror("shmget: shmget failed"); - exit(1); - } - - if ((mem = shmat(shmid, NULL, 0)) == (char *) -1) { - perror("shmat"); - exit(1); - } -printf("Wshmid: %x\n", shmid); - - if ((semid = semget(shmid, NSEM, 0)) == -1) { - perror("Wsemget: "); - exit(1); - } - - signal(SIGQUIT, quit); - - srand(time(NULL)); - - while (1) { - rand() % id; - - sb.sem_num = 0; sb.sem_op = -1; sb.sem_flg = 0; - semop(semid, &sb, 1); - - for (i = 0; i < 1<<14; i++) { - mem[i]= 0x30 + id; - } - - sb.sem_op = 1; - semop(semid, &sb, 1); - - sleep(rand() % ((id * 2) + 1)); - } -} diff --git a/timing/client.c b/timing/client.c deleted file mode 100644 index 9a83a5b..0000000 --- a/timing/client.c +++ /dev/null @@ -1,133 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define BUFFER_SIZE 1024 - -typedef struct number_val { - int val; - struct timeval tv; -} num; - -int sock_init(argv) -char **argv; -{ - struct sockaddr_in address, serv_addr; - int sock = 0, valread; - char buffer[1024] = {0}; - - if ((sock = socket(AF_INET, SOCK_STREAM, 0)) < 0) { - perror("Socket creation error\n"); - return -1; - } - - memset(&serv_addr, '0', sizeof(serv_addr)); - - serv_addr.sin_family = AF_INET; - serv_addr.sin_port = htons(atoi(argv[2])); - - if (inet_pton(AF_INET, argv[1], &serv_addr.sin_addr) <= 0) { - perror("Invalid address/Address not supported\n"); - return -1; - } - - if (connect(sock, (struct sockaddr *) &serv_addr, sizeof(serv_addr)) < 0) { - perror("Connection Failed"); - return -1; - } - return sock; -} - -int main(argc, argv) -int argc; -char **argv; -{ - int sock = 0, valread, start_ptr = -1, end_ptr = 0, qset, i; - double avg = 0, cnt; - char buffer[1024] = {0}; - num num_buffer[BUFFER_SIZE]; - fd_set rs, ws, es; - struct timeval now, *ww, tmp; - - sock = sock_init(argv); - - ww = NULL; - - printf("Connected, waiting for numbers...\n"); - while (1) { - FD_ZERO(&rs); FD_ZERO(&ws); FD_ZERO(&es); - FD_SET(sock, &rs); - - qset = select(sock + 1, &rs, (fd_set *) 0, (fd_set *) 0, ww); - - gettimeofday(&now, NULL); - now.tv_sec = now.tv_sec - 60; - - ww = &tmp; - tmp.tv_sec = 0; - tmp.tv_usec = 500000; - - if (FD_ISSET(sock, &rs)) { - valread = read(sock, buffer, 1024); - for (i = 0; i < valread; i+=2) { - if (end_ptr == BUFFER_SIZE) - end_ptr = 0; - if (end_ptr != start_ptr) { - gettimeofday(&(num_buffer[end_ptr].tv), NULL); - num_buffer[end_ptr].val = atoi(buffer + i); - end_ptr++; - } else { - printf("dropping number...\n"); - } - } - } - - if (start_ptr == -1) - start_ptr = 0; - - if (start_ptr < end_ptr) { - for (i = start_ptr; i < end_ptr; i++) { - if (num_buffer[i].tv.tv_sec <= now.tv_sec) - start_ptr = i + 1; - } - } else { - for (i = start_ptr; i < BUFFER_SIZE; i++) { - num_buffer[end_ptr].val = atoi(buffer); - end_ptr++; - num_buffer[end_ptr].val = atoi(buffer); - end_ptr++; - num_buffer[end_ptr].val = atoi(buffer); - end_ptr++; - if (num_buffer[i].tv.tv_sec <= now.tv_sec) - start_ptr = i + 1; - } - - for (i = 0; i < end_ptr; i++) { - if (num_buffer[i].tv.tv_sec <= now.tv_sec) - start_ptr = i + 1; - } - - start_ptr %= BUFFER_SIZE; - } - - avg = 0; - cnt = start_ptr > end_ptr ? (BUFFER_SIZE - start_ptr + end_ptr) : (end_ptr - start_ptr); - cnt = cnt == 0 ? 10 : cnt; - - if (start_ptr < end_ptr) { - for (i = start_ptr; i < end_ptr; i++) - avg += num_buffer[i].val / cnt; - } else { - for (i = start_ptr; i < BUFFER_SIZE; i++) - avg += num_buffer[i].val / cnt; - for (i = 0; i < end_ptr; i++) - avg += num_buffer[i].val /cnt; - } - - printf("avg: %10.5f\n", avg); - } -} diff --git a/webCrawler2/README.rst b/webCrawler2/README.rst deleted file mode 100644 index 1168fb9..0000000 --- a/webCrawler2/README.rst +++ /dev/null @@ -1,13 +0,0 @@ -=========== -Web Crawler -=========== - -Web crawler for Web Science class - -Dependencies -============ -- `GoQuery `_. - -Authors -======= -- Tucker Evans diff --git a/webCrawler2/crawler.go b/webCrawler2/crawler.go deleted file mode 100644 index 5c4dba6..0000000 --- a/webCrawler2/crawler.go +++ /dev/null @@ -1,164 +0,0 @@ -package main - -import "crypto/md5" -import "fmt" -import "github.com/PuerkitoBio/goquery" -import "log" -import "net/url" -import "os" -import "strconv" -import "strings" -import "sync" -import "sync/atomic" -import "time" - -type link struct { - u *url.URL - depth int -} - -var mutex *sync.Mutex -var Prev map[string]bool -var base string -var links_visited uint64 = 0 - -func validLink(s string) bool { - return true - //return (strings.HasSuffix(s, ".html") || strings.HasSuffix(s, "/") || strings.HasSuffix(s, "\\")) -} - -func addLinks(doc *goquery.Document, jobs chan link, current link, depth int, worker_id int) { - doc.Find("body a").Each(func(index int, item *goquery.Selection) { - link_s, _ := item.Attr("href") - - d := depth + 1 - - u, err := url.Parse(link_s) - if err != nil { - panic(err) - } - - if !u.IsAbs() { - u = current.u.ResolveReference(u) - } - if strings.Contains(u.String(), base) && validLink(u.String()) { - mutex.Lock() - if !Prev[u.String()] { - jobs <- link{u, d} - Prev[u.String()] = true - } - mutex.Unlock() - } - }) -} - -func consume(doc *goquery.Document, url link, worker_id int) { - f, _ := os.Create(fmt.Sprintf("./pages/%x", md5.Sum([]byte(url.u.String())))) - s, _ := doc.Html() - f.Write([]byte(s)) -} - -func worker(done chan bool, jobs chan link, depth int, id int, total uint64) { - for { - x := atomic.LoadUint64(&links_visited) - if x >= total { - done <- true - return - } - - atomic.AddUint64(&links_visited, 1) - select { - case j := <-jobs: - if j.depth < depth { - doc, err := goquery.NewDocument(j.u.String()) - if err != nil { - log.Print("Error Reading Document: " + j.u.String() + err.Error()) - break - } - - fmt.Printf("worker %d Working on %s...\n", id, j.u.String()) - - consume(doc, j, id) - addLinks(doc, jobs, j, j.depth, id) - } - case <-time.After(time.Second * 10): - fmt.Printf("Worker %d done\n", id) - done <- true - return - } - } -} - -func init() { - mutex = &sync.Mutex{} - Prev = make(map[string]bool) - var err error - - fi, err := os.Lstat("./pages"); - if err != nil { - fmt.Printf("INIT ERROR: %s\n", err); - } - - if (fi == nil) { - os.Mkdir("./pages", 0755); - } else if (fi.Mode().IsRegular()) { - panic("pages is not a valid directory\n") - } - -} - -func main() { - var d, w, b int - var t uint64 - - if len(os.Args) < 5 { - fmt.Printf("usage: crawler url depth max_links workers\n") - panic("test") - } - - base = strings.TrimPrefix(os.Args[1], "http://www.") - base = strings.TrimPrefix(base, "https://www.") - if base == os.Args[1] { - panic(base) - } - - d, _ = strconv.Atoi(os.Args[2]) - b, _ = (strconv.Atoi(os.Args[3])) - t = uint64(b) - b, _ = (strconv.Atoi(os.Args[3])) - t = uint64(b) - w, _ = strconv.Atoi(os.Args[4]) - - jobs := make(chan link, 1024*1024) - done := make(chan bool) - - u, err := url.Parse(os.Args[1]) - if err != nil { - panic(err) - } - - if !u.IsAbs() { - panic("Cannot start with relative url") - } - jobs <- link{u, 0} - - //send first job - - for i := 0; i < w; i++ { - go worker(done, jobs, d, i, t) - } - - for i := 0; i < w; { - select { - case <-done: - i++ - case <-time.After(1 * time.Second): - if len(jobs) == (1024 * 1024) { - i = w - } - } - } - - close(done) - close(jobs) -} -- cgit v1.1