Proxy_lab【Second_BC の BloG】

Proxy_lab

发表于 2022-10-01 | 更新于 2022-10-16

Lab

CSAPP

实现

一个web代理，并有多线程和缓存功能，所以一一来实现；

根据 write up 中所说，首先需要实现 HTTP/1.0 GET 请求的顺序代理：读取整个请求并解析请求（是否是有效HTTP请求），如果是则建立自己到适当 web服务器的连接，请求客户端指定对象，再将响应转发回客户端；注意：HTTP请求每行以\r\n结束，并以\r\n为尾行；
- 具体要做到将url解析为三部分：host，后半url，HTTP版本；
- 请求头中包含ua，host，connection，proxy-connection；
- 请求端口无论在url中还是默认的都必须正确；
- 处理过早关闭的连接，需要捕获SIGPIPE；
实现多线程工作（生产者-消费者）；
实现缓存最近内存中使用的web对象（LRU策略）；
- 设置缓存的最大内存，以及单个对象的最大内存；

handout给出了tiny服务器的源码，只需要在这个基础上进行改装；

Tiny解析

main函数：

int main(int argc, char **argv) 
{
    int listenfd, connfd;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    //输入端口参数
    if (argc != 2) {
	fprintf(stderr, "usage: %s <port>\n", argv[0]);
	exit(1);
    }

    //监听描述符
    listenfd = Open_listenfd(argv[1]);
    while (1) {
	clientlen = sizeof(clientaddr);
    //接受请求成为描述符
	connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); 
        //读取套接字信息，IP和端口
        Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
                    port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);
    //响应
	doit(connfd); 
    //关闭接受描述符
	Close(connfd);                                            
    }
}

doit函数：

void doit(int fd) 
{
    int is_static;
    struct stat sbuf;
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    char filename[MAXLINE], cgiargs[MAXLINE];
    rio_t rio;

    //读取请求行
    Rio_readinitb(&rio, fd);
    if (!Rio_readlineb(&rio, buf, MAXLINE))  
        return;
    printf("%s", buf);
    sscanf(buf, "%s %s %s", method, uri, version);       //解析请求行
    if (strcasecmp(method, "GET")) {                     //是否为GET请求
        clienterror(fd, method, "501", "Not Implemented",
                    "Tiny does not implement this method");
        return;
    }                                                    
    read_requesthdrs(&rio);    //显示请求行和头（printf）                          

    
    is_static = parse_uri(uri, filename, cgiargs);      //解析uri
    if (stat(filename, &sbuf) < 0) {                    
	clienterror(fd, filename, "404", "Not found",
		    "Tiny couldn't find this file");
	return;
    }                                                    

    if (is_static) {       
	if (!(S_ISREG(sbuf.st_mode)) || !(S_IRUSR & sbuf.st_mode)) { 
	    clienterror(fd, filename, "403", "Forbidden",
			"Tiny couldn't read the file");
	    return;
	}
	serve_static(fd, filename, sbuf.st_size);        //静态
    }
    else { 
	if (!(S_ISREG(sbuf.st_mode)) || !(S_IXUSR & sbuf.st_mode)) { 
	    clienterror(fd, filename, "403", "Forbidden",
			"Tiny couldn't run the CGI program");
	    return;
	}
	serve_dynamic(fd, filename, cgiargs);            //动态
    }
}

serve_static函数：

void serve_static(int fd, char *filename, int filesize)
{
    int srcfd;
    char *srcp, filetype[MAXLINE], buf[MAXBUF];

    //发送响应行和报头
    get_filetype(filename, filetype);    
    sprintf(buf, "HTTP/1.0 200 OK\r\n"); 
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Server: Tiny Web Server\r\n");
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-length: %d\r\n", filesize);
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Content-type: %s\r\n\r\n", filetype);
    Rio_writen(fd, buf, strlen(buf));    

    //回响载体
    srcfd = Open(filename, O_RDONLY, 0); 
    srcp = Mmap(0, filesize, PROT_READ, MAP_PRIVATE, srcfd, 0); //映射内存保证原文件纯净
    Close(srcfd);                       
    Rio_writen(fd, srcp, filesize);     
    Munmap(srcp, filesize);             
}

serve_dynamic函数：

void serve_dynamic(int fd, char *filename, char *cgiargs) 
{
    char buf[MAXLINE], *emptylist[] = { NULL };

    //行与报头
    sprintf(buf, "HTTP/1.0 200 OK\r\n"); 
    Rio_writen(fd, buf, strlen(buf));
    sprintf(buf, "Server: Tiny Web Server\r\n");
    Rio_writen(fd, buf, strlen(buf));
  
    //子进程
    if (Fork() == 0) {
	
	setenv("QUERY_STRING", cgiargs, 1);  //用url参数初始化环境变量
	Dup2(fd, STDOUT_FILENO);        //重定向输出到fd
	Execve(filename, emptylist, environ);  //运行CGI程序
    }
    Wait(NULL); //等待子进程结束回收
}

I . 顺序代理GET请求

writeup中的要求：

处理 HTTP/1.0 版本，如果遇到1.1，则需要将其作为1.0版本转发；
转发合法 HTTP 请求（实现中所示）；

头中的 ua 和两个 connection 都有给定的值：

1
2
3

"User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305Firefox/10.0.3\r\n"
"Proxy-Connection: close" 
"Connection: close"

实际上要做的，就是将doit内的操作变为转发与回复，而不是单纯回响；

那么需要将发送的包改写给目标服务器，之后把目标服务器的回响写给发送者；

要看uri中是否有端口那就应该解析uri，但和上面解析是不一样的，上面是在看读取的文件是静态还是动态；

主函数和tiny一样，只是需要在 listen之前加一条：

1	signal(SIGPIPE,SIG_IGN);

新建三个全局变量：

//uri解析记录变量
char send_port[MAXLINE];
char send_host[MAXLINE];
char send_path[MAXLINE];

doit：

void doit(int fd) 
{
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    char backbuf[MAXLINE],newhd[MAXLINE];
    char *send;
    rio_t rio,serverfd_rio;

    //读取请求行
    Rio_readinitb(&rio, fd);
    if (!Rio_readlineb(&rio, buf, MAXLINE))  
        return;
    printf("%s", buf);
    sscanf(buf, "%s %s %s", method, uri, version);       //解析请求行
    if (strcasecmp(method, "GET")) {                     //是否为GET请求
        clienterror(fd, method, "501", "Not Implemented",
                    "Tiny does not implement this method");
        return;
    }                                                    
    read_requesthdrs(&rio);    //显示请求行和头（printf）                          

    //解析uri为host port path
    parse_uri(uri);

    //改写
    sprintf(newhd, "GET %s HTTP/1.0\r\n", send_path);
    send = built_message(newhd,&rio);

	//开启远程服务器
    int serverfd = Open_clientfd(send_host,send_port);
    if (serverfd < 0)
    {
        printf("connection failed\n");
        return;
    }
	
    Rio_readinitb(&serverfd_rio, serverfd);
    //写入服务器
    Rio_writen(serverfd, send, strlen(send));

    size_t n;

    //回响
    while((n = Rio_readlineb(&serverfd_rio,backbuf,MAXLINE)) != 0)
    {
        printf("proxy received %d bytes,then send\n", (int)n);
        Rio_writen(fd,backbuf,n);
    }
    
    Close (serverfd);
}

两个神奇函数：

void parse_uri(char *uri)
{
    //是否有host：port，port默认80
    char *hostpath = strstr(uri,"//");
    if(hostpath != NULL) //有
    {
        //是否有port
        char *portpath = strstr(hostpath + 2,":");
        if(portpath != NULL) //有
        {
            int num;
            sscanf(portpath+1,"%d%s",&num,send_path);
            sprintf(send_port,"%d",num);
            *portpath = NULL;
        }
        else //无
        {
        	char *path = strstr(hostpath + 2,"/");
        	if(path != NULL) 
        	{
                strcpy(send_path,path);
        		strcpy(send_port,"80");
            	*path = NULL;
			}
        }
        strcpy(send_host,hostpath + 2);
        return;
    }
    else  //无
    {
        char *path = strstr(uri,"/");
        if(path != NULL)
        {
            strcpy(send_path,path);
        }
        strcpy(send_port,"80");
        return;
    }
}

char *built_message( char *getit,rio_t *rp)
{
    //构造新头
    char buf[MAXLINE];
    char rio[MAXLINE];
    sprintf(buf,"%s",getit);
    sprintf(buf,"%sHost: %s\r\n",buf,send_host);
    sprintf(buf,"%sUser-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:10.0.3) Gecko/20120305 Firefox/10.0.3\r\n",buf);
    sprintf(buf,"%sConnection: close\r\n",buf);
    sprintf(buf,"%sProxy-Connection: close\r\n\r\n",buf);

    //补上原内容
    Rio_readnb(rp,rio,MAXLINE);
    sprintf(buf,"%s%s",buf,rio);
    return buf;
}

II . 多线程的并发

实现多线程使用消费者-生产者模型：

消费者和生产者共同使用一个 n个槽的优先缓冲区，生产者产生新的项目并插入缓冲区；消费者取出这些项目并使用；

因此两者的访问需要互斥，并且调度地访问：空状态（消费者等待），满状态（生产者等待）；

在这个实验里，消费者就是服务端，接受各样的连接；生产者就是客户端，发送各样的连接；

实现缓冲区：

typedef struct {
    int *buf;          // 缓冲区数组         
    int n;             // 槽的最大数量
    int front;         // buf[(front+1)%n] 是第一个项目
    int rear;          // buf[rear%n] 是最后一个项目
    sem_t mutex;       //互斥锁，初始化1
    sem_t slots;       //记录槽，初始化n
    sem_t items;       //记录项目，初始化0
} sbuf_t;

客户端插入函数：

void sbuf_insert(sbuf_t *sp, int item)
{
    P(&sp->slots);                          // 对slots加锁，保证槽位满时，客户端挂起
    P(&sp->mutex);                          // 对缓冲区互斥访问
    sp->buf[(++sp->rear)%(sp->n)] = item;   // 添加项目
    V(&sp->mutex);                          // 解锁
    V(&sp->items);                          //与slots对应地调整items
}

服务端实现后移除项目的函数：

int sbuf_remove(sbuf_t *sp)
{
    int item;
    P(&sp->items);                          // 如果项目没有，服务端挂起
    P(&sp->mutex);                          // 加锁缓冲区
    item = sp->buf[(++sp->front)%(sp->n)];  // 移除项目
    V(&sp->mutex);                          // 解锁
    V(&sp->slots);                         
    return item;							//返回客户端的描述符
}

主函数（和tiny的main差不多）：

int main(int argc, char **argv) 
{
    int listenfd, connfd;
    char hostname[MAXLINE], port[MAXLINE];
    socklen_t clientlen;
    struct sockaddr_storage clientaddr;

    //输入端口参数
    if (argc != 2) {
	fprintf(stderr, "usage: %s <port>\n", argv[0]);
	exit(1);
    }
	//阻塞SIGPIPE信号
    signal(SIGPIPE,SIG_IGN);
    //监听描述符
    listenfd = Open_listenfd(argv[1]);
    
    //创建线程
    sbuf_init(&sbuf, SBUFSIZE);
    for(int i = 0; i < NTHREADS; i++)
    {
        Pthread_create(&tid, NULL, thread, NULL);
    }
    
    while (1) {
	clientlen = sizeof(clientaddr);
    //接受请求成为描述符
	connfd = Accept(listenfd, (SA *)&clientaddr, &clientlen); 
    //插入描述符
    sbuf_insert(&sbuf, connfd);
        //读取套接字信息，IP和端口
        Getnameinfo((SA *) &clientaddr, clientlen, hostname, MAXLINE, 
                    port, MAXLINE, 0);
        printf("Accepted connection from (%s, %s)\n", hostname, port);                                       
    }
}

线程执行函数：

void *thread(void *vargp)
{
    Pthread_detach(pthread_self());
    while(1){
    //从缓冲区中读出描述符
    int connfd = sbuf_remove(&sbuf);

    doit(connfd);
    Close(connfd);}
}

III . 缓存web对象

目的是为了让多次访问的web对象不用再连接服务器，直接响应；

这里会使用读者-写者模型，让线程从缓存中读和写：

只读的线程叫读者，只写的进程叫写者，读者可以和其他读者共享只读部分，写者需要有独立的访问；

这个模型有两种情况：

读者优先，写者优先；

这里使用读优先：

int read_cnt;		//记录读者数量
sem_t mutex, w;    //都初始化为1，w导使有读无写，有写无读


void reader(void) 
{
    while(1){
        P(&mutex);
        readcnt++;
        if(readcnt==1) //第一个读者导致w加锁，则写者挂起；
            P(&w);
        V(&mutex);
        
        P(&mutex);
        readcnt--;
        if(readcnt==0) //最后一个读者结束解锁w
            V(&w);
        V(&mutex);
    }
}

void writer(void)
{
	while(1){
        P(&w);
        
        ...
        
        V(&w)
    }
}

设置缓存区：

typedef struct
{
    char obj[MAX_OBJECT_SIZE];
    char uri[MAXLINE];
    int LRU;
    int isEmpty;

    int read_cnt; //读者数量
    sem_t w;      //Cache信号量
    sem_t mutex;  //read_cnt信号量

} block;

typedef struct
{
    block data[MAX_CACHE];
    int num;
} Cache;

修改doit函数中的内容，得到请求后，判断uri是否在缓存中，不在就添加进去：

void doit(int fd) 
{
    char buf[MAXLINE], method[MAXLINE], uri[MAXLINE], version[MAXLINE];
    char backbuf[MAXLINE],newhd[MAXLINE];
    char *send;
    char cache_tag[MAXLINE];
    rio_t rio,serverfd_rio;

    //读取请求行
    Rio_readinitb(&rio, fd);
    if (!Rio_readlineb(&rio, buf, MAXLINE))  
        return;
    printf("%s", buf);
    sscanf(buf, "%s %s %s", method, uri, version);       //解析请求行
   	strcpy(cache_tag,uri);
    if (strcasecmp(method, "GET")) {                     //是否为GET请求
        clienterror(fd, method, "501", "Not Implemented",
                    "Tiny does not implement this method");
        return;
    }                                                    
    read_requesthdrs(&rio);    //显示请求行和头（printf）
    
    //uri是否存在缓存中
    int i;
    if ((i = get_Cache(cache_tag)) != -1)
    {
        //加锁
        P(&cache.data[i].mutex);
        cache.data[i].read_cnt++;
        if (cache.data[i].read_cnt == 1)
            P(&cache.data[i].w);
        V(&cache.data[i].mutex);

        Rio_writen(connfd, cache.data[i].obj, strlen(cache.data[i].obj));

        P(&cache.data[i].mutex);
        cache.data[i].read_cnt--;
        if (cache.data[i].read_cnt == 0)
            V(&cache.data[i].w);
        V(&cache.data[i].mutex);
        return;
    }

    //解析uri为host port path
    parse_uri(uri);

    //改写
    sprintf(newhd, "GET %s HTTP/1.0\r\n", send_path);
    send = built_message(newhd,&rio);

	//开启远程服务器
    int serverfd = Open_clientfd(send_host,send_port);
    if (serverfd < 0)
    {
        printf("connection failed\n");
        return;
    }
	
    Rio_readinitb(&serverfd_rio, serverfd);
    //写入服务器
    Rio_writen(serverfd, send, strlen(send));

    char cache_buf[MAX_OBJECT_SIZE];
    int size_buf = 0;
    size_t n;

    //回响
    while((n = Rio_readlineb(&serverfd_rio,backbuf,MAXLINE)) != 0)
    {
        size_buf += n;
        if(size_buf < MAX_OBJECT_SIZE)
            strcat(cache_buf, buf);
        printf("proxy received %d bytes,then send\n", (int)n);
        Rio_writen(fd,backbuf,n);
    }
    
    Close (serverfd);
    
    //没有就写入缓存
    if(size_buf < MAX_OBJECT_SIZE){
        write_Cache(cache_tag, cache_buf);
    }

}

总结

虽然迷迷糊糊的，但跟着线程走了一遍，多多少少学会了更多的东西：比如信号量的运用，线程创建和运作方式，以及状态机和模型的特点；但这个lab确实感受到了难度，等往后学的深入再返回看的话应该还会有收获；

本文作者: Second_BC

本文链接: https://secondbc.github.io/SecondBC/2022/10/01/Proxy-lab/