Boa源码分析 – 4 – 转意处理

于 2012年04月26日 发布在 linux应用 跳到评论

build_needs_escape函数目的是要建立一个位图bitmap,表示哪些字符需要转意。此函数在escape.c中,首先到escape.h中看看。

#include "config.h"

/* Highest character number that can possibly be passed through un-escaped */
#define NEEDS_ESCAPE_BITS 128
//表示128位就行
#ifndef NEEDS_ESCAPE_SHIFT
#define NEEDS_ESCAPE_SHIFT 5     /* 1 << 5 is 32 bits */
#endif

#define NEEDS_ESCAPE_WORD_LENGTH (1<<NEEDS_ESCAPE_SHIFT)
//
#define NEEDS_ESCAPE_INDEX(c) ((c)>>NEEDS_ESCAPE_SHIFT)
//index嘛,表示在_needs_escape中的第几个数上表示

/* Assume variable shift is fast, otherwise this could be a table lookup */
#define NEEDS_ESCAPE_MASK(c)  (1<<((c)&(NEEDS_ESCAPE_WORD_LENGTH - 1)))
//NEEDS_ESCAPE_WORD_LENGTH - 1相当于一个掩码,取c最右边的五bit,然后在把1左移这么多位,就可以通过它获得相应位的状态了

/* Newer compilers could use an inline function.
 * This macro works great, as long as you pass unsigned int or unsigned char.
 */
#define needs_escape(c) ((c)>=NEEDS_ESCAPE_BITS || _needs_escape[NEEDS_ESCAPE_INDEX(c)]&NEEDS_ESCAPE_MASK(c))
//实现很简洁。。。
extern unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH];
void build_needs_escape(void);


escape.h中的东西让我看了很久才看懂。之前看programming pearls时也实现了个bitmap,在这里

然后来看看escape.c

 


unsigned long _needs_escape[(NEEDS_ESCAPE_BITS+NEEDS_ESCAPE_WORD_LENGTH-1)/NEEDS_ESCAPE_WORD_LENGTH];

void build_needs_escape(void)
{
    unsigned int a, b;//理论上这里不应该是unsigned long么。。
    const unsigned char special[] =
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        "abcdefghijklmnopqrstuvwxyz"
        "0123456789"
        "-_.!~*'():@&=+$,/?";
        //以上不需要转意
    /* 21 Mar 2002 - jnelson - confirm with Apache 1.3.23 that '?'
     * is safe to leave unescaped.
     */
    unsigned short i, j;

    b = 1;
    for (a=0; b!=0; a++) b=b<<1;
    /* I found $a bit positions available in an unsigned long. */
    if (a < NEEDS_ESCAPE_WORD_LENGTH) {//大于a的时候表示unsigned long就不能保存WORD_LENGTH的东东了
        fprintf(stderr,
                "NEEDS_ESCAPE_SHIFT configuration error -- "\
                "%d should be <= log2(%d)\n",
                NEEDS_ESCAPE_SHIFT, a);
        exit(1);
    } else if (a >= 2*NEEDS_ESCAPE_WORD_LENGTH) {//这时可能需要优化,NEED_ESCAPE_SHIFT++
        /* needs_escape_shift configuration suboptimal */
    } else {
        /* Ahh, just right! */;
    }
    memset(_needs_escape, ~0, sizeof(_needs_escape));//默认是1,需要escape
    for(i = 0; i < sizeof(special) - 1; ++i) {
        j=special[i];
        if (j>=NEEDS_ESCAPE_BITS) {
            /* warning: character $j will be needlessly escaped. */
        } else {
            _needs_escape[NEEDS_ESCAPE_INDEX(j)]&=~NEEDS_ESCAPE_MASK(j);//不需要时将其置0
        }
    }
}
//在编译事加-DTEST 参数可以运行以下,测试看对不对。
#ifdef TEST
int main(void)
{
    int i;
    build_needs_escape();
    for(i = 0; i <= NEEDS_ESCAPE_BITS; ++i) {
        if (needs_escape(i)) {
            fprintf(stdout, "%3d needs escape.\n", i);
        }
    }
    return(0);
}
#endif

好吧,就这么多了,,感觉这一部分用c++的bitset比较好。。唉。。

留下评论!

:wink: :twisted: :roll: :oops: :mrgreen: :lol: :idea: :evil: :cry: :arrow: :?: :-| :-x :-o :-P :-D :-? :) :( :!: 8-O 8)