对http协议格式的分析 #63

kimown · 2016-02-28T04:05:48Z

https://nodejs.org/en/about/

const http = require('http');

const hostname = '127.0.0.1';
const port = 1337;

http.createServer((req, res) => {
  res.writeHead(200, { 'Content-Type': 'text/plain' });
  res.end('Hello World\n');
}).listen(port, hostname, () => {
  console.log(`Server running at http://${hostname}:${port}/`);
});

走socks看到报文内容，

{
  "type": "Buffer",
  "data":[72,84,84,80,47,49,46,49,32,50,48,48,32,79,75,13,10,67,111,110,116,101,110,116,45,84,121,112,101,58,32,116,101,120,116,47,112,108,97,105,110,13,10,68,97,116,101,58,32,83,117,110,44,32,50,56,32,70,101,98,32,50,48,49,54,32,48,51,58,53,51,58,49,51,32,71,77,84,13,10,67,111,110,110,101,99,116,105,111,110,58,32,107,101,101,112,45,97,108,105,118,101,13,10,84,114,97,110,115,102,101,114,45,69,110,99,111,100,105,110,103,58,32,99,104,117,110,107,101,100,13,10,13,10,99,13,10,72,101,108,108,111,32,87,111,114,108,100,10,13,10,48,13,10,13,10]
}

我们写一个程序分析下，这个涉及node buffer的有关二进制 unicode的处理，这个方面我还要加强下，还是有点了解的不通透。


var s="";
var a=[72,84,84,80,47,49,46,49,32,50,48,48,32,79,75,13,10,67,111,110,116,101,110,116,45,84,121,112,101,58,32,116,101,120,116,47,112,108,97,105,110,13,10,68,97,116,101,58,32,83,117,110,44,32,50,56,32,70,101,98,32,50,48,49,54,32,48,51,58,53,51,58,49,51,32,71,77,84,13,10,67,111,110,110,101,99,116,105,111,110,58,32,107,101,101,112,45,97,108,105,118,101,13,10,84,114,97,110,115,102,101,114,45,69,110,99,111,100,105,110,103,58,32,99,104,117,110,107,101,100,13,10,13,10,99,13,10,72,101,108,108,111,32,87,111,114,108,100,10,13,10,48,13,10,13,10];
a.map((v)=>{
s+=String.fromCharCode(v)
})
console.debug(s);

输出内容：

HTTP/1.1 200 OK
Content-Type: text/plain
Date: Sun, 28 Feb 2016 03:53:13 GMT
Connection: keep-alive
Transfer-Encoding: chunked

c
Hello World

0

我一直不理解这个c和0是什么意思。尤其是gzip传输的内容，我还是无法理解，按照我的想法，response body体内直接是内容，Hello World就足够了，为什么还多了几个字节？？？
其实reponse body体内，是

对应文本：

c
Hello World

0

将该文本转化为字符串，为
"c\r\nHello World\n\r\n0"
再转化为二进制，

var a=[];
"c\r\nHello World\n\r\n0".split('').map((v)=>{
a.push(v.charCodeAt(0));
})
console.debug(a)

结果是
[99, 13, 10, 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 10, 13, 10, 48]
将上面的数组a按照http格式排下序，
用到的方法：


function string2unicode(s) {
    var a = [];
    s.split('').map((v)=> {
        a.push(v.charCodeAt(0));
    })
    console.debug(a);
}
function unicode2string(unicode) {
    var s = "";
    unicode.map((v)=> {
        s += String.fromCharCode(v)
    })
    console.log(s);
}
/**
 * decimal 2 hexadecimal
 */
function getHexFromNum(param) {
    let result = null;
    if (Array.isArray(param)) {
        result = [];
        param.map((v)=> {
            let n = Number(v);
            result.push(n.toString(16));
        })
    } else {
        let n = Number(param);
        result = n.toString(16);
    }
    return result;
}
检测：
unicode2string([72, 84, 84, 80, 47, 49, 46, 49, 32, 50, 48, 48, 32, 79, 75])
string2unicode('HTTP/1.1 200 OK')

getHexFromNum([72, 84, 84, 80, 47, 49, 46, 49, 32, 50, 48, 48, 32, 79, 75]) 
16进制结果显示：
["48", "54", "54", "50", "2f", "31", "2e", "31", "20", "32", "30", "30", "20", "4f", "4b"]

对应wireshark可以看到，

在《HTTP权威指南》第3.2章有这样一句话，

起始行和首部就是由行分隔的ASCII文本。每行都以一个由两个字符组成的行终止序列作为结束，其中包括一个回车符（ASCII码13）和一个换行符（ASCII码10）。这个行终止序列可以写做CRLF。

这个CRLF就是上面一张图挂在最后的两个16进制数据(0d 0a),我们对\r\d来进行分析，
string2unicode('\r\n')结果是[13, 10]
getHexFromNum([13,10])结果是["d", "a"].分析结束

The text was updated successfully, but these errors were encountered:

kimown · 2016-03-05T14:53:39Z

/**
 * hexadecimal 2 decimal
 */
function getDecFromHex (param) {
   var result = null;
    if(Array.isArray(param)){
        result =[];
        param.map((v)=>{
            var _v=parseInt(v,16);
            result.push(_v);
        })
    }else {
        result=parseInt(param,16);
    }
    return result;
}

/**
 * hexadecimal 2 decimal 2 string
 */
function getStrFromDecFromHex (param) {
    var _result='';
    var result = null;
    if(Array.isArray(param)){
        result =[];
        param.map((v)=>{
            var _v=parseInt(v,16);
            result.push(_v);
        })
        result.map((v)=>{
            _result+=String.fromCharCode(v);
        })
    }else {
        result=parseInt(param,16);
        _result=String.fromCharCode(result);
    }

    return _result;
}

这个结果很有意思，getStrFromDecFromHex('0d')和getStrFromDecFromHex('0a')不同含义。

经过gzip压缩过，

kimown · 2016-03-05T14:58:35Z

看下未压缩的 Hello World! 😝

getStrFromDecFromHex("48 65 6c 6c 6f 20 57 6f 72 6c 64 21".split(' ')) 输出"Hello World!"

kimown · 2016-03-05T15:02:37Z

看下经过gzip压缩过的,这是32字节

getDecFromHex(["1f", "8b", "08", "00", "00", "00", "00", "00", "00", "03", "f3", "48", "cd", "c9", "c9", "57", "08", "cf", "2f", "ca", "49", "51", "04", "00", "a3", "1c", "29", "1c", "0c", "00", "00", "00"])
输出
[31, 139, 8, 0, 0, 0, 0, 0, 0, 3, 243, 72, 205, 201, 201, 87, 8, 207, 47, 202, 73, 81, 4, 0, 163, 28, 41, 28, 12, 0, 0, 0]

kimown · 2016-03-06T05:50:41Z

我总算明白了一个现象，

var zlib = require('zlib');

var text = "Hello World!";
var buf = new Buffer(text, 'utf-8');   // Choose encoding for the string.
zlib.gzip(buf, function (_, result) {  // The callback will give you the
    var a=new Buffer(result.toString());
    zlib.gunzip(a,(err , data)=>{
        console.log("解压结果"+data);
    })
});

这个buf和a根本不是相同的，坑了坑了，果然对Buffer理解不到位坑我了

kimown · 2016-03-06T05:52:56Z

对于gzip内容的分析，现在我只能通过字符串的ASCII码来判断。

{
  "type": "Buffer",
  "data": [

72,84,84,80,47,49,46,49,32,50,48,48,32,79,75,13,10,
67,111,110,116,101,110,116,45,84,121,112,101,58,32,116,101,120,116,47,112,108,97,105,110,13,10,

67,111,110,116,101,110,116,45,69,110,99,111,100,105,110,103,58,32,103,122,105,112,13,10,

68,97,116,101,58,32,83,117,110,44,32,48,54,32,77,97,114,32,50,48,49,54,32,48,53,58,50,52,58,49,54,32,71,77,84,13,10,

67,111,110,110,101,99,116,105,111,110,58,32,107,101,101,112,45,97,108,105,118,101,13,10,
84,114,97,110,115,102,101,114,45,69,110,99,111,100,105,110,103,58,32,99,104,117,110,107,101,100,

13,10,13,10,

50,48,13,10,
31,139,8,0,0,0,0,0,0,3,243,72,205,201,201,87,8,207,47,202,73,81,4,0,163,28,41,28,12,0,0,0,13,10,
48,
13,10,13,10
  ]
}

kimown · 2016-03-06T14:28:19Z

按照ecmascript中string的split的大概意思模仿出了一个分割数组方法。


var a=[

    72,84,84,80,47,49,46,49,32,50,48,48,32,79,75,13,10,
    67,111,110,116,101,110,116,45,84,121,112,101,58,32,116,101,120,116,47,112,108,97,105,110,13,10,

    67,111,110,116,101,110,116,45,69,110,99,111,100,105,110,103,58,32,103,122,105,112,13,10,

    68,97,116,101,58,32,83,117,110,44,32,48,54,32,77,97,114,32,50,48,49,54,32,48,53,58,50,52,58,49,54,32,71,77,84,13,10,

    67,111,110,110,101,99,116,105,111,110,58,32,107,101,101,112,45,97,108,105,118,101,13,10,
    84,114,97,110,115,102,101,114,45,69,110,99,111,100,105,110,103,58,32,99,104,117,110,107,101,100,

    13,10,13,10,

    50,48,13,10,
    31,139,8,0,0,0,0,0,0,3,243,72,205,201,201,87,8,207,47,202,73,81,4,0,163,28,41,28,12,0,0,0,13,10,
    48,
    13,10,13,10
];
/**
 * 对数组进行分割
 */
Array.prototype.splitByArray = function (param){
    var _this=this;
    var result=[];
    var q=0;
    var p=0;
    _this.map((v,k)=>{
        if(v==param[0]){
            var flag=true;
            param.map((val,key)=>{
                if(_this[k+key]!=val){
                    flag=false;
                }
            })
            if(flag){
                p=k;
                result.push(_this.slice(q,p));
                q=p+param.length;
            }
        }
    })
    return result;
}

a.splitByArray([13,10]);

kimown · 2016-03-12T09:43:36Z

参考fiddler的文件替换的首部，
HTTP/1.1 200 OK
Date: Sat, 12 Mar 2016 09:32:57 GMT
Content-Length: 3
Cache-Control: max-age=0, must-revalidate
Content-Type: text/plain

123
其中需要注意的就是这个Date首部.

var d = new Date()
console.log(d.toUTCString())

http://w3school.com.cn/jsref/jsref_toUTCString.asp

kimown · 2016-03-20T03:45:07Z

在http首部中，有Content-Length属性，它标识body体的长度，但是这个长度不是body体字符串的长度，而是字节的长度，在nodejs中即将字符串转化为buffer流的length，如果body的字节长度小于length，则客户端继续等待数据；如果大于length，则浏览器截取length长度的数据。

参考：
http://cnodejs.org/topic/51dcc6b9d44cbfa304269cff
https://cnodejs.org/topic/4fabf73aa016fe532302738c
http://cnodejs.org/topic/50616f5301d0b801482695cf

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

对http协议格式的分析 #63

对http协议格式的分析 #63

kimown commented Feb 28, 2016

kimown commented Mar 5, 2016

kimown commented Mar 5, 2016

kimown commented Mar 5, 2016

kimown commented Mar 6, 2016

kimown commented Mar 6, 2016

kimown commented Mar 6, 2016

kimown commented Mar 12, 2016

kimown commented Mar 20, 2016

对http协议格式的分析 #63

对http协议格式的分析 #63

Comments

kimown commented Feb 28, 2016

kimown commented Mar 5, 2016

kimown commented Mar 5, 2016

kimown commented Mar 5, 2016

kimown commented Mar 6, 2016

kimown commented Mar 6, 2016

kimown commented Mar 6, 2016

kimown commented Mar 12, 2016

kimown commented Mar 20, 2016