我在测试用google的CryptoJS库来计算文件的md5和sha1。
最简单的异步调用
var fr = new FileReader;
fr.onload=function(event){
var _bytelength = fr.result.byteLength;
var binary = CryptoJS.lib.WordArray.create(fr.result);
var md5 = CryptoJS.MD5(binary).toString();
var sha1 = CryptoJS.SHA1(binary);
var base64 = sha1.toString(CryptoJS.enc.Base64);;
console.log(md5);
document.getElementById("message").innerHTML = "file " + currentFile.name + " is read" + "<p>" + "md5 is " + md5
+ "<p>" + "sha1 is " + base64;
}
fr.readAsArrayBuffer(_file);
这样是没有问题的。
但是当我计算上百兆的文件的时候,一次性的load文件,浏览器就会崩掉,下面是改进代码,区别是slice文件后,读取每一个blob,计算每一个blog的md5,sha1,当读完所有blob后,就计算出结果来。
function readFile(_file)
{
currentFile = _file;
var startByte, endByte;
startByte = endByte = 0;
var fileSize = _file.size;
var md5_cal = CryptoJS.algo.MD5.create();
var sha1_cal = CryptoJS.algo.SHA1.create();
var blob;
var cnt = 0;//rememeber the count of calling of readBlob
var readBlob = function(_blob)
{
var fr = new FileReader;
fr.onload=function(event){
var binary = CryptoJS.lib.WordArray.create(fr.result);
md5_cal.update(binary);
sha1_cal.update(binary);
cnt = cnt - 1;
};
/*fr.onloadend=function(event){
if(this.readyState == FileReader.DONE) {
var binary = CryptoJS.lib.WordArray.create(fr.result);
md5_cal.update(binary);
sha1_cal.update(binary);
}
};*/
fr.readAsArrayBuffer(_blob);
};
while(endByte != fileSize)
{
endByte = Math.min(startByte + 102400, fileSize);
blob = _file.slice(startByte, endByte);
readBlob(blob);
startByte = endByte;
cnt = cnt + 1;
}
function printResultIfReady()
{
if(cnt == 0)
{
var md5 = md5_cal.finalize().toString();
var sha1 = sha1_cal.finalize();
var base64 = sha1.toString(CryptoJS.enc.Base64);
document.getElementById("message").innerHTML = "file " + currentFile.name + " is read" + "<p>" + "md5 is " + md5
+ "<p>" + "sha1 is " + base64;
}
else{
setTimeout(printResultIfReady, 500);
}
}
setTimeout(printResultIfReady,500);
}
但由于异步调用的先后完成的顺序不一样,导致各个浏览器的计算结果不同,chrome的正确,ie和firefox错误,但各自不同。
于是我改进下,就是当前一个blob读取调用完成后,才进行下一个blob的读取,这样就没有问题了。
function readFile(_file)
{
currentFile = _file;
var startByte, endByte;
startByte = endByte = 0;
var fileSize = _file.size;
var md5_cal = CryptoJS.algo.MD5.create();
var sha1_cal = CryptoJS.algo.SHA1.create();
var blob;
var cnt = 0;//rememeber the count of calling of readBlob
var _readFile = function()
{
endByte = Math.min(startByte + 102400, fileSize);
blob = _file.slice(startByte, endByte);
readBlob(blob);
startByte = endByte;
cnt = cnt + 1;
}
var readBlob = function(_blob)
{
var fr = new FileReader;
fr.onload=function(event){
var binary = CryptoJS.lib.WordArray.create(fr.result);
md5_cal.update(binary);
sha1_cal.update(binary);
cnt = cnt - 1;
if(endByte != fileSize)
{
_readFile();
}
else{
printResultIfReady();
}
};
fr.readAsArrayBuffer(_blob);
};
function printResultIfReady()
{
var md5 = md5_cal.finalize().toString();
var sha1 = sha1_cal.finalize();
var base64 = sha1.toString(CryptoJS.enc.Base64);
document.getElementById("message").innerHTML = "file " + currentFile.name + " is read" + "<p>" + "md5 is " + md5
+ "<p>" + "sha1 is " + base64;
}
if(endByte != fileSize)
{
_readFile();
}
}
代码很丑陋,不过可以说明问题就好。