我想将非常大的文件读入node.js中的JavaScript数组。
因此,如果文件是这样的:
first line
two
three
...
...
我将拥有数组:
['first line','two','three', ... , ... ]
该函数将如下所示:
var array = load(filename);
因此,将其全部加载为字符串然后拆分的想法是不可接受的。
我想将非常大的文件读入node.js中的JavaScript数组。
因此,如果文件是这样的:
first line
two
three
...
...
我将拥有数组:
['first line','two','three', ... , ... ]
该函数将如下所示:
var array = load(filename);
因此,将其全部加载为字符串然后拆分的想法是不可接受的。
Answers:
如果您可以将最终数据拟合到数组中,那么是否也不能像建议的那样将其拟合为字符串并拆分呢?无论如何,如果您希望一次只处理一行文件,也可以尝试如下操作:
var fs = require('fs');
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
while (index > -1) {
var line = remaining.substring(0, index);
remaining = remaining.substring(index + 1);
func(line);
index = remaining.indexOf('\n');
}
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
function func(data) {
console.log('Line: ' + data);
}
var input = fs.createReadStream('lines.txt');
readLines(input, func);
编辑:(作为对phopkins的评论的回应),我认为(至少在较新版本中)子字符串不会复制数据,而是创建一个特殊的SlicedString对象(快速浏览v8源代码)。无论如何,这里都有一个避免提到的子字符串的修改(在文件中测试了几兆字节的“所有工作,没有玩耍会使杰克成为一个愚蠢的男孩”):
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
func(line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
var fs = require('fs');
var array = fs.readFileSync('file.txt').toString().split("\n");
for(i in array) {
console.log(array[i]);
}
var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
for(i in array) {
console.log(array[i]);
}
});
使用Node.js readline模块。
var fs = require('fs');
var readline = require('readline');
var filename = process.argv[2];
readline.createInterface({
input: fs.createReadStream(filename),
terminal: false
}).on('line', function(line) {
console.log('Line: ' + line);
});
\n
!请参阅:stackoverflow.com/questions/18450197/...
js:
var array = fs.readFileSync('file.txt', 'utf8').split('\n');
ts:
var array = fs.readFileSync('file.txt', 'utf8').toString().split('\n');
TypeError: fs.readFileSync(...).split is not a function
,应使用.toString()如下:var array = fs.readFileSync('file.txt', 'utf8').toString().split('\n');
使用readline(文档)。这是一个读取css文件,解析图标并将其写入json的示例
var results = [];
var rl = require('readline').createInterface({
input: require('fs').createReadStream('./assets/stylesheets/_icons.scss')
});
// for every new line, if it matches the regex, add it to an array
// this is ugly regex :)
rl.on('line', function (line) {
var re = /\.icon-icon.*:/;
var match;
if ((match = re.exec(line)) !== null) {
results.push(match[0].replace(".",'').replace(":",''));
}
});
// readline emits a close event when the file is read.
rl.on('close', function(){
var outputFilename = './icons.json';
fs.writeFile(outputFilename, JSON.stringify(results, null, 2), function(err) {
if(err) {
console.log(err);
} else {
console.log("JSON saved to " + outputFilename);
}
});
});
使用BufferedReader,但是函数应该是异步的:
var load = function (file, cb){
var lines = [];
new BufferedReader (file, { encoding: "utf8" })
.on ("error", function (error){
cb (error, null);
})
.on ("line", function (line){
lines.push (line);
})
.on ("end", function (){
cb (null, lines);
})
.read ();
};
load ("file", function (error, lines){
if (error) return console.log (error);
console.log (lines);
});
这是@mtomis对上面答案的一种变体。
它创建了一行流。它发出“数据”和“结束”事件,使您能够处理流的结尾。
var events = require('events');
var LineStream = function (input) {
var remaining = '';
input.on('data', function (data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
this.emit('data', line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
}.bind(this));
input.on('end', function() {
if (remaining.length > 0) {
this.emit('data', remaining);
}
this.emit('end');
}.bind(this));
}
LineStream.prototype = new events.EventEmitter;
用作包装器:
var lineInput = new LineStream(input);
lineInput.on('data', function (line) {
// handle line
});
lineInput.on('end', function() {
// wrap it up
});
var EventEmitter = require('events').EventEmitter; var util = require('util'); function GoodEmitter() { EventEmitter.call(this); } util.inherits(GoodEmitter, EventEmitter);
var li1 = new LineStream(input1), li2 = new LineStream(input2);
然后计算每一次触发“结束”的次数
var fs = require('fs'); var input1 = fs.createReadStream('text.txt'); var ls1 = new LineStream(input1); ls1.on('data', function (line) { console.log('1:line=' + line); }); ls1.on('end', function (line) { console.log('1:fin'); }); var input2 = fs.createReadStream('text.txt'); var ls2 = new LineStream(input2); ls2.on('data', function (line) { console.log('2:line=' + line); }); ls2.on('end', function (line) { console.log('2:fin'); });
输出:文本文件中的每一行为每个实例触发一次。“结束”也是如此。
我遇到了同样的问题,并且已经逐行解决了这个问题
https://www.npmjs.com/package/逐行
至少对我而言,无论是在同步还是异步模式下,它的工作方式都非常吸引人。
此外,可以使用以下选项解决行终止而不终止\ n的问题:
{ encoding: 'utf8', skipEmptyLines: false }
线的同步处理:
var LineByLineReader = require('line-by-line'),
lr = new LineByLineReader('big_file.txt');
lr.on('error', function (err) {
// 'err' contains error object
});
lr.on('line', function (line) {
// 'line' contains the current line without the trailing newline character.
});
lr.on('end', function () {
// All lines are read, file is closed now.
});
使用Node.js v8或更高版本具有一项新功能,可将正常功能转换为异步功能。
这是一个了不起的功能。这是将txt文件中的10000个数字解析为数组,并使用数字上的归类排序计算倒数的示例。
// read from txt file
const util = require('util');
const fs = require('fs')
fs.readFileAsync = util.promisify(fs.readFile);
let result = []
const parseTxt = async (csvFile) => {
let fields, obj
const data = await fs.readFileAsync(csvFile)
const str = data.toString()
const lines = str.split('\r\n')
// const lines = str
console.log("lines", lines)
// console.log("str", str)
lines.map(line => {
if(!line) {return null}
result.push(Number(line))
})
console.log("result",result)
return result
}
parseTxt('./count-inversion.txt').then(() => {
console.log(mergeSort({arr: result, count: 0}))
})
要将大文件读入数组,可以逐行或逐块读取。
逐行参考我的答案
var fs = require('fs'),
es = require('event-stream'),
var lines = [];
var s = fs.createReadStream('filepath')
.pipe(es.split())
.pipe(es.mapSync(function(line) {
//pause the readstream
s.pause();
lines.push(line);
s.resume();
})
.on('error', function(err) {
console.log('Error:', err);
})
.on('end', function() {
console.log('Finish reading.');
console.log(lines);
})
);
逐块参考本文
var offset = 0;
var chunkSize = 2048;
var chunkBuffer = new Buffer(chunkSize);
var fp = fs.openSync('filepath', 'r');
var bytesRead = 0;
while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) {
offset += bytesRead;
var str = chunkBuffer.slice(0, bytesRead).toString();
var arr = str.split('\n');
if(bytesRead = chunkSize) {
// the last item of the arr may be not a full line, leave it to the next chunk
offset -= arr.pop().length;
}
lines.push(arr);
}
console.log(lines);