在Swift中逐行读取文件/ URL


79

我正在尝试读取中给定的文件NSURL并将其加载到数组中,其中各项之间用换行符分隔\n

到目前为止,这是我做的方法:

var possList: NSString? = NSString.stringWithContentsOfURL(filePath.URL) as? NSString
if var list = possList {
    list = list.componentsSeparatedByString("\n") as NSString[]
    return list
}
else {
    //return empty list
}

我对此不太满意,原因有两个。第一,我正在处理的文件大小从几千字节到几百MB不等。可以想象,使用如此大的字符串是缓慢且笨拙的。其次,这会在执行时冻结UI,这同样是不好的。

我已经考虑过在单独的线程中运行此代码,但是我一直在遇到麻烦,此外,它仍然不能解决处理巨大字符串的问题。

我想做的事情与以下伪代码类似:

var aStreamReader = new StreamReader(from_file_or_url)
while aStreamReader.hasNextLine == true {
    currentline = aStreamReader.nextLine()
    list.addItem(currentline)
}

我将如何在Swift中完成此任务?

关于我正在读取的文件的一些注意事项:所有文件均由短字符串(<255个字符)组成,用\n或分隔\r\n。文件的长度从大约100行到超过5000万行。它们可能包含欧洲字符和/或带有重音符号的字符。


您是想随行将阵列写出到磁盘上,还是让OS用内存来处理它?运行Mac的Mac是否具有足够的内存,您可以映射文件并以这种方式使用它?多个任务很容易完成,我想您可能有多个作业在不同位置开始读取文件。
macshome 2014年

Answers:


150

(该代码现在适用于Swift 2.2 / Xcode 7.3。如果有人需要,可以在编辑历史记录中找到旧版本。最后提供了Swift 3的更新版本。)

以下Swift代码从如何逐行从NSFileHandle中读取数据的各种答案中获得了很大的启发 。它从文件中分块读取,并将完整的行转换为字符串。

\n可以使用可选参数设置默认行定界符(),字符串编码(UTF-8)和块大小(4096)。

class StreamReader  {

    let encoding : UInt
    let chunkSize : Int

    var fileHandle : NSFileHandle!
    let buffer : NSMutableData!
    let delimData : NSData!
    var atEof : Bool = false

    init?(path: String, delimiter: String = "\n", encoding : UInt = NSUTF8StringEncoding, chunkSize : Int = 4096) {
        self.chunkSize = chunkSize
        self.encoding = encoding

        if let fileHandle = NSFileHandle(forReadingAtPath: path),
            delimData = delimiter.dataUsingEncoding(encoding),
            buffer = NSMutableData(capacity: chunkSize)
        {
            self.fileHandle = fileHandle
            self.delimData = delimData
            self.buffer = buffer
        } else {
            self.fileHandle = nil
            self.delimData = nil
            self.buffer = nil
            return nil
        }
    }

    deinit {
        self.close()
    }

    /// Return next line, or nil on EOF.
    func nextLine() -> String? {
        precondition(fileHandle != nil, "Attempt to read from closed file")

        if atEof {
            return nil
        }

        // Read data chunks from file until a line delimiter is found:
        var range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
        while range.location == NSNotFound {
            let tmpData = fileHandle.readDataOfLength(chunkSize)
            if tmpData.length == 0 {
                // EOF or read error.
                atEof = true
                if buffer.length > 0 {
                    // Buffer contains last line in file (not terminated by delimiter).
                    let line = NSString(data: buffer, encoding: encoding)

                    buffer.length = 0
                    return line as String?
                }
                // No more lines.
                return nil
            }
            buffer.appendData(tmpData)
            range = buffer.rangeOfData(delimData, options: [], range: NSMakeRange(0, buffer.length))
        }

        // Convert complete line (excluding the delimiter) to a string:
        let line = NSString(data: buffer.subdataWithRange(NSMakeRange(0, range.location)),
            encoding: encoding)
        // Remove line (and the delimiter) from the buffer:
        buffer.replaceBytesInRange(NSMakeRange(0, range.location + range.length), withBytes: nil, length: 0)

        return line as String?
    }

    /// Start reading from the beginning of file.
    func rewind() -> Void {
        fileHandle.seekToFileOffset(0)
        buffer.length = 0
        atEof = false
    }

    /// Close the underlying file. No reading must be done after calling this method.
    func close() -> Void {
        fileHandle?.closeFile()
        fileHandle = nil
    }
}

用法:

if let aStreamReader = StreamReader(path: "/path/to/file") {
    defer {
        aStreamReader.close()
    }
    while let line = aStreamReader.nextLine() {
        print(line)
    }
}

您甚至可以将阅读器与for-in循环一起使用

for line in aStreamReader {
    print(line)
}

通过实施SequenceType协议(比较http://robots.thoughtbot.com/swift-sequences):

extension StreamReader : SequenceType {
    func generate() -> AnyGenerator<String> {
        return AnyGenerator {
            return self.nextLine()
        }
    }
}

Swift 3 / Xcode 8 beta 6更新:还要“现代化”使用guard和新 Data值类型:

class StreamReader  {

    let encoding : String.Encoding
    let chunkSize : Int
    var fileHandle : FileHandle!
    let delimData : Data
    var buffer : Data
    var atEof : Bool

    init?(path: String, delimiter: String = "\n", encoding: String.Encoding = .utf8,
          chunkSize: Int = 4096) {

        guard let fileHandle = FileHandle(forReadingAtPath: path),
            let delimData = delimiter.data(using: encoding) else {
                return nil
        }
        self.encoding = encoding
        self.chunkSize = chunkSize
        self.fileHandle = fileHandle
        self.delimData = delimData
        self.buffer = Data(capacity: chunkSize)
        self.atEof = false
    }

    deinit {
        self.close()
    }

    /// Return next line, or nil on EOF.
    func nextLine() -> String? {
        precondition(fileHandle != nil, "Attempt to read from closed file")

        // Read data chunks from file until a line delimiter is found:
        while !atEof {
            if let range = buffer.range(of: delimData) {
                // Convert complete line (excluding the delimiter) to a string:
                let line = String(data: buffer.subdata(in: 0..<range.lowerBound), encoding: encoding)
                // Remove line (and the delimiter) from the buffer:
                buffer.removeSubrange(0..<range.upperBound)
                return line
            }
            let tmpData = fileHandle.readData(ofLength: chunkSize)
            if tmpData.count > 0 {
                buffer.append(tmpData)
            } else {
                // EOF or read error.
                atEof = true
                if buffer.count > 0 {
                    // Buffer contains last line in file (not terminated by delimiter).
                    let line = String(data: buffer as Data, encoding: encoding)
                    buffer.count = 0
                    return line
                }
            }
        }
        return nil
    }

    /// Start reading from the beginning of file.
    func rewind() -> Void {
        fileHandle.seek(toFileOffset: 0)
        buffer.count = 0
        atEof = false
    }

    /// Close the underlying file. No reading must be done after calling this method.
    func close() -> Void {
        fileHandle?.closeFile()
        fileHandle = nil
    }
}

extension StreamReader : Sequence {
    func makeIterator() -> AnyIterator<String> {
        return AnyIterator {
            return self.nextLine()
        }
    }
}

1
@Matt:没关系。您可以将扩展名与“主类”放在相同的Swift文件中,也可以放在单独的文件中。-实际上,您实际上不需要扩展。您可以将该generate()函数添加到StreamReader类中,并声明为class StreamReader : Sequence { ... }。但是将扩展用于单独的功能似乎是一种很好的Swift风格。
Martin R

1
@zanzoken:您使用哪种URL?上面的代码仅适用于文件URL。它不能用于从常规服务器URL读取。比较stackoverflow.com/questions/26674182/…和我在问题下的评论。
Martin R

2
@zanzoken:我的代码用于文本文件,并且希望该文件使用指定的编码(默认为UTF-8)。如果您有一个带有任意二进制字节的文件(例如图像文件),那么data-> string转换将失败。
Martin R

1
@zanzoken:从图像读取扫描线是一个完全不同的主题,与这段代码无关。我敢肯定,例如可以使用CoreGraphics方法来完成此操作,但是我没有即时参考。
马丁·R

2
@DCDCwhile !aStreamReader.atEof { try autoreleasepool { guard let line = aStreamReader.nextLine() else { return } ...code... } }
Eporediese

25

高效便捷的类,用于逐行读取文本文件(Swift 4,Swift 5)

注意:此代码与平台无关(macOS,iOS,ubuntu)

import Foundation

/// Read text file line by line in efficient way
public class LineReader {
   public let path: String

   fileprivate let file: UnsafeMutablePointer<FILE>!

   init?(path: String) {
      self.path = path
      file = fopen(path, "r")
      guard file != nil else { return nil }
   }

   public var nextLine: String? {
      var line:UnsafeMutablePointer<CChar>? = nil
      var linecap:Int = 0
      defer { free(line) }
      return getline(&line, &linecap, file) > 0 ? String(cString: line!) : nil
   }

   deinit {
      fclose(file)
   }
}

extension LineReader: Sequence {
   public func  makeIterator() -> AnyIterator<String> {
      return AnyIterator<String> {
         return self.nextLine
      }
   }
}

用法:

guard let reader = LineReader(path: "/Path/to/file.txt") else {
    return; // cannot open file
}

for line in reader {
    print(">" + line.trimmingCharacters(in: .whitespacesAndNewlines))      
}

github上的仓库


5

Swift 4.2安全语法

class LineReader {

    let path: String

    init?(path: String) {
        self.path = path
        guard let file = fopen(path, "r") else {
            return nil
        }
        self.file = file
    }
    deinit {
        fclose(file)
    }

    var nextLine: String? {
        var line: UnsafeMutablePointer<CChar>?
        var linecap = 0
        defer {
            free(line)
        }
        let status = getline(&line, &linecap, file)
        guard status > 0, let unwrappedLine = line else {
            return nil
        }
        return String(cString: unwrappedLine)
    }

    private let file: UnsafeMutablePointer<FILE>
}

extension LineReader: Sequence {
    func makeIterator() -> AnyIterator<String> {
        return AnyIterator<String> {
            return self.nextLine
        }
    }
}

用法:

guard let reader = LineReader(path: "/Path/to/file.txt") else {
    return
}
reader.forEach { line in
    print(line.trimmingCharacters(in: .whitespacesAndNewlines))      
}

4

我玩游戏迟到了,但这是我为此目的写的一堂小课。经过一些不同的尝试(尝试继承NSInputStream),我发现这是一种合理而简单的方法。

记住要#import <stdio.h>在您的桥接标题中。

// Use is like this:
let readLine = ReadLine(somePath)
while let line = readLine.readLine() {
    // do something...
}

class ReadLine {

    private var buf = UnsafeMutablePointer<Int8>.alloc(1024)
    private var n: Int = 1024

    let path: String
    let mode: String = "r"

    private lazy var filepointer: UnsafeMutablePointer<FILE> = {
        let csmode = self.mode.withCString { cs in return cs }
        let cspath = self.path.withCString { cs in return cs }

        return fopen(cspath, csmode)
    }()

    init(path: String) {
        self.path = path
    }

    func readline() -> String? {
        // unsafe for unknown input
        if getline(&buf, &n, filepointer) > 0 {
            return String.fromCString(UnsafePointer<CChar>(buf))
        }

        return nil
    }

    deinit {
        buf.dealloc(n)
        fclose(filepointer)
    }
}

我喜欢这个,但是仍然可以改进。withCString不必使用创建指针 (实际上实际上是不安全的),您只需调用即可return fopen(self.path, self.mode)。您可能会添加一项检查,以确认是否确实可以打开该文件,当前该文件readline()将崩溃。在UnsafePointer<CChar>不需要投。最后,您的用法示例无法编译。
Martin R

4

此函数采用文件URL并返回一个序列,该序列将返回文件的每一行,并懒惰地读取它们。它可以与Swift 5一起使用。它依赖于底层getline

typealias LineState = (
  // pointer to a C string representing a line
  linePtr:UnsafeMutablePointer<CChar>?,
  linecap:Int,
  filePtr:UnsafeMutablePointer<FILE>?
)

/// Returns a sequence which iterates through all lines of the the file at the URL.
///
/// - Parameter url: file URL of a file to read
/// - Returns: a Sequence which lazily iterates through lines of the file
///
/// - warning: the caller of this function **must** iterate through all lines of the file, since aborting iteration midway will leak memory and a file pointer
/// - precondition: the file must be UTF8-encoded (which includes, ASCII-encoded)
func lines(ofFile url:URL) -> UnfoldSequence<String,LineState>
{
  let initialState:LineState = (linePtr:nil, linecap:0, filePtr:fopen(url.path,"r"))
  return sequence(state: initialState, next: { (state) -> String? in
    if getline(&state.linePtr, &state.linecap, state.filePtr) > 0,
      let theLine = state.linePtr  {
      return String.init(cString:theLine)
    }
    else {
      if let actualLine = state.linePtr  { free(actualLine) }
      fclose(state.filePtr)
      return nil
    }
  })
}

因此,例如,这是您使用它来打印应用程序捆绑包中名为“ foo”的文件的每一行的方式:

let url = NSBundle.mainBundle().urlForResource("foo", ofType: nil)!
for line in lines(ofFile:url) {
  // suppress print's automatically inserted line ending, since
  // lineGenerator captures each line's own new line character.
  print(line, separator: "", terminator: "")
}

通过修改Alex Brown的答案以消除Martin R的评论中提到的内存泄漏,并将其更新为Swift 5,我开发了此答案。


2

尝试答案,或阅读《 Mac OS Stream编程指南》

您可能会发现使用stringWithContentsOfURL,实际上性能会更好,因为使用基于内存(或内存映射)的数据比基于磁盘的数据更快。

在另一个线程上执行它也有详细记录,例如此处

更新资料

如果您不想一次阅读所有内容,又不想使用NSStreams,则可能必须使用C级文件I / O。不这样做的原因有很多-阻止,字符编码,处理I / O错误,加快命名速度,但有几个原因-这就是Foundation库的目的。我在下面草绘了一个简单的答案,该答案仅涉及ACSII数据:

class StreamReader {

    var eofReached = false
    let fileHandle: UnsafePointer<FILE>

    init (path: String) {
        self.fileHandle = fopen(path.bridgeToObjectiveC().UTF8String, "rb".bridgeToObjectiveC().UTF8String)
    }

    deinit {
        fclose(self.fileHandle)
    }

    func nextLine() -> String {
        var nextChar: UInt8 = 0
        var stringSoFar = ""
        var eolReached = false
        while (self.eofReached == false) && (eolReached == false) {
            if fread(&nextChar, 1, 1, self.fileHandle) == 1 {
                switch nextChar & 0xFF {
                case 13, 10 : // CR, LF
                    eolReached = true
                case 0...127 : // Keep it in ASCII
                    stringSoFar += NSString(bytes:&nextChar, length:1, encoding: NSASCIIStringEncoding)
                default :
                    stringSoFar += "<\(nextChar)>"
                }
            } else { // EOF or error
                self.eofReached = true
            }
        }
        return stringSoFar
    }
}

// OP's original request follows:
var aStreamReader = StreamReader(path: "~/Desktop/Test.text".stringByStandardizingPath)

while aStreamReader.eofReached == false { // Changed property name for more accurate meaning
    let currentline = aStreamReader.nextLine()
    //list.addItem(currentline)
    println(currentline)
}

我对此建议表示赞赏,但我正在Swift中专门寻找代码。另外,我想一次只处理一行,而不是一次处理所有行。
马特

那么,您是否要处理一行内容然后释放它并阅读下一行内容?我将认为在内存中使用它会更快。是否需要按顺序处理它们?如果没有,您可以使用枚举块来极大地加快阵列的处理速度。
macshome 2014年

我想一次抓取许多行,但不一定需要加载所有行。至于井然有序,这并不关键,但这会有所帮助。
马特

如果将扩展case 0...127为非ASCII字符会怎样?
马特

1
好吧,这实际上取决于文件中使用的字符编码。如果它们是Unicode的多种格式之一,则需要对此进行编码;如果它们是许多Unicode之前的PC“代码页”系统之一,则需要对其进行解码。Foundation库为您完成所有这些工作,这是您自己要做的许多工作。
Grimxn 2014年

2

事实证明,一旦您使用UnsafePointer,良好的老式C API就可以在Swift中使用了。这是一只简单的猫,它从stdin读取并逐行打印到stdout。您甚至都不需要Foundation。达尔文足以:

import Darwin
let bufsize = 4096
// let stdin = fdopen(STDIN_FILENO, "r") it is now predefined in Darwin
var buf = UnsafePointer<Int8>.alloc(bufsize)
while fgets(buf, Int32(bufsize-1), stdin) {
    print(String.fromCString(CString(buf)))
}
buf.destroy()

1
根本无法“按行”处理。它使输入数据流血以输出,并且不能识别普通字符和行尾字符之间的差异。显然,输出包含与输入相同的行,但这是因为换行符也被遮住了。
亚历克斯·布朗

3
@AlexBrown:事实并非如此。fgets()读取最多(包括)换行符(或EOF)的字符。还是我误会了您的评论?
Martin R

@Martin R,请问这在Swift 4/5中看起来如何?我需要这么简单的内容才能逐行读取文件–
gbenroscience

1

或者您可以简单地使用Generator

let stdinByLine = GeneratorOf({ () -> String? in
    var input = UnsafeMutablePointer<Int8>(), lim = 0
    return getline(&input, &lim, stdin) > 0 ? String.fromCString(input) : nil
})

让我们尝试一下

for line in stdinByLine {
    println(">>> \(line)")
}

它简单,懒惰,并且易于与其他快速对象(例如枚举器和函子,例如map,reduce,filter)链接。使用lazy()包装器。


它概括FILE为:

let byLine = { (file:UnsafeMutablePointer<FILE>) in
    GeneratorOf({ () -> String? in
        var input = UnsafeMutablePointer<Int8>(), lim = 0
        return getline(&input, &lim, file) > 0 ? String.fromCString(input) : nil
    })
}

叫像

for line in byLine(stdin) { ... }

非常感谢现在已经离开的答案,该答案给了我getline代码!
亚历克斯·布朗

1
显然,我完全忽略了编码。留给读者练习。
亚历克斯·布朗

请注意,由于getline()为数据分配了缓冲区,因此代码会泄漏内存。
马丁R

1

(注意:我在Mac OS Sierra 10.12.3的Xcode 8.2.1上使用Swift 3.0.1)

我在这里看到的所有答案都错过了他可能正在寻找LF或CRLF。如果一切顺利,他/她可以在LF上进行匹配,并在返回的字符串末尾检查额外的CR。但是一般查询涉及多个搜索字符串。换句话说,定界符必须为Set<String>,其中集合既不为空也不包含空字符串,而不是单个字符串。

在去年的第一次尝试中,我尝试做“正确的事”并搜索通用的字符串集。太难了;您需要功能强大的解析器和状态机等。我放弃了它,并且放弃了这个项目。

现在,我再次执行该项目,并再次面临同样的挑战。现在,我将对CR和LF进行硬编码搜索。我认为没有人需要在CR / LF解析之外搜索两个这样的半独立和半独立字符。

我正在使用由提供的搜索方法 Data,因此在这里我不进行字符串编码和其他操作。只是原始的二进制处理。假设我在这里有一个ASCII超集,例如ISO Latin-1或UTF-8。您可以在下一层处理字符串编码,然后判断附加了辅助代码点的CR / LF仍然算作CR还是LF。

算法:仅从您当前的字节偏移量中搜索下一个CR下一个LF。

  • 如果两者均未找到,则考虑下一个数据字符串为从当前偏移量到数据结尾的位置。请注意,终止符长度为0。将此标记为读取循环的结尾。
  • 如果首先找到一个LF,或者仅找到一个LF,则考虑下一个数据串是从当前偏移量到LF。请注意,终止符长度为1。将偏移量移到LF之后。
  • 如果仅找到CR,请像LF情况一样(只是使用不同的字节值)。
  • 否则,我们得到一个CR,然后是LF。
    • 如果两者相邻,则处理方式类似于LF情况,但终结器的长度为2。
    • 如果它们之间有一个字节,并且所说的字节也是CR,那么我们得到“ Windows开发人员在文本模式下写了一个二进制\ r \ n,结果是\ r \ r \ n”。像LF一样处理它,除了终止符长度为3。
    • 否则,CR和LF不会连接,并且只能像CR一样处理。

这是一些代码:

struct DataInternetLineIterator: IteratorProtocol {

    /// Descriptor of the location of a line
    typealias LineLocation = (offset: Int, length: Int, terminatorLength: Int)

    /// Carriage return.
    static let cr: UInt8 = 13
    /// Carriage return as data.
    static let crData = Data(repeating: cr, count: 1)
    /// Line feed.
    static let lf: UInt8 = 10
    /// Line feed as data.
    static let lfData = Data(repeating: lf, count: 1)

    /// The data to traverse.
    let data: Data
    /// The byte offset to search from for the next line.
    private var lineStartOffset: Int = 0

    /// Initialize with the data to read over.
    init(data: Data) {
        self.data = data
    }

    mutating func next() -> LineLocation? {
        guard self.data.count - self.lineStartOffset > 0 else { return nil }

        let nextCR = self.data.range(of: DataInternetLineIterator.crData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
        let nextLF = self.data.range(of: DataInternetLineIterator.lfData, options: [], in: lineStartOffset..<self.data.count)?.lowerBound
        var location: LineLocation = (self.lineStartOffset, -self.lineStartOffset, 0)
        let lineEndOffset: Int
        switch (nextCR, nextLF) {
        case (nil, nil):
            lineEndOffset = self.data.count
        case (nil, let offsetLf):
            lineEndOffset = offsetLf!
            location.terminatorLength = 1
        case (let offsetCr, nil):
            lineEndOffset = offsetCr!
            location.terminatorLength = 1
        default:
            lineEndOffset = min(nextLF!, nextCR!)
            if nextLF! < nextCR! {
                location.terminatorLength = 1
            } else {
                switch nextLF! - nextCR! {
                case 2 where self.data[nextCR! + 1] == DataInternetLineIterator.cr:
                    location.terminatorLength += 1  // CR-CRLF
                    fallthrough
                case 1:
                    location.terminatorLength += 1  // CRLF
                    fallthrough
                default:
                    location.terminatorLength += 1  // CR-only
                }
            }
        }
        self.lineStartOffset = lineEndOffset + location.terminatorLength
        location.length += self.lineStartOffset
        return location
    }

}

当然,如果Data块的长度至少为千兆字节的很大一部分,那么只要当前字节偏移量不存在CR或LF,就会受到打击。在每次迭代过程中总是无结果地搜索直到结束。分块读取数据将有助于:

struct DataBlockIterator: IteratorProtocol {

    /// The data to traverse.
    let data: Data
    /// The offset into the data to read the next block from.
    private(set) var blockOffset = 0
    /// The number of bytes remaining.  Kept so the last block is the right size if it's short.
    private(set) var bytesRemaining: Int
    /// The size of each block (except possibly the last).
    let blockSize: Int

    /// Initialize with the data to read over and the chunk size.
    init(data: Data, blockSize: Int) {
        precondition(blockSize > 0)

        self.data = data
        self.bytesRemaining = data.count
        self.blockSize = blockSize
    }

    mutating func next() -> Data? {
        guard bytesRemaining > 0 else { return nil }
        defer { blockOffset += blockSize ; bytesRemaining -= blockSize }

        return data.subdata(in: blockOffset..<(blockOffset + min(bytesRemaining, blockSize)))
    }

}

您必须自己将这些想法混合在一起,因为我还没有完成。考虑:

  • 当然,您必须考虑完全包含在块中的行。
  • 但是,当一行的末端位于相邻的块中时,您必须处理。
  • 或者端点之间至少有一个块
  • 最大的麻烦是,当该行以一个多字节序列结尾时,但是所说的序列跨越了两个块!(以CR结尾的行也是该块中的最后一个字节,这是一种等效情况,因为您需要读取下一个块以查看just-CR实际上是CRLF还是CR-CRLF。当块以CR-CR结尾。)
  • 而且,您需要处理当前偏移量中没有更多终止符的情况,但是数据结尾位于后面的块中。

祝好运!


1

跟随@dankogai的回答,我对Swift 4+进行了一些修改,

    let bufsize = 4096
    let fp = fopen(jsonURL.path, "r");
    var buf = UnsafeMutablePointer<Int8>.allocate(capacity: bufsize)

    while (fgets(buf, Int32(bufsize-1), fp) != nil) {
        print( String(cString: buf) )
     }
    buf.deallocate()

这对我有用。

谢谢


0

我想要一个不会连续修改缓冲区或重复代码的版本,因为两者效率低下,并且允许任何大小的缓冲区(包括1个字节)和任何定界符。它有一种公共方法:readline()。调用此方法将返回下一行的String值,或者在EOF返回nil。

import Foundation

// LineStream(): path: String, [buffSize: Int], [delim: String] -> nil | String
// ============= --------------------------------------------------------------
// path:     the path to a text file to be parsed
// buffSize: an optional buffer size, (1...); default is 4096
// delim:    an optional delimiter String; default is "\n"
// ***************************************************************************
class LineStream {
    let path: String
    let handle: NSFileHandle!

    let delim: NSData!
    let encoding: NSStringEncoding

    var buffer = NSData()
    var buffSize: Int

    var buffIndex = 0
    var buffEndIndex = 0

    init?(path: String,
      buffSize: Int = 4096,
      delim: String = "\n",
      encoding: NSStringEncoding = NSUTF8StringEncoding)
    {
      self.handle = NSFileHandle(forReadingAtPath: path)
      self.path = path
      self.buffSize = buffSize < 1 ? 1 : buffSize
      self.encoding = encoding
      self.delim = delim.dataUsingEncoding(encoding)
      if handle == nil || self.delim == nil {
        print("ERROR initializing LineStream") /* TODO use STDERR */
        return nil
      }
    }

  // PRIVATE
  // fillBuffer(): _ -> Int [0...buffSize]
  // ============= -------- ..............
  // Fill the buffer with new data; return with the buffer size, or zero
  // upon reaching end-of-file
  // *********************************************************************
  private func fillBuffer() -> Int {
    buffer = handle.readDataOfLength(buffSize)
    buffIndex = 0
    buffEndIndex = buffer.length

    return buffEndIndex
  }

  // PRIVATE
  // delimLocation(): _ -> Int? nil | [1...buffSize]
  // ================ --------- ....................
  // Search the remaining buffer for a delimiter; return with the location
  // of a delimiter in the buffer, or nil if one is not found.
  // ***********************************************************************
  private func delimLocation() -> Int? {
    let searchRange = NSMakeRange(buffIndex, buffEndIndex - buffIndex)
    let rangeToDelim = buffer.rangeOfData(delim,
                                          options: [], range: searchRange)
    return rangeToDelim.location == NSNotFound
        ? nil
        : rangeToDelim.location
  }

  // PRIVATE
  // dataStrValue(): NSData -> String ("" | String)
  // =============== ---------------- .............
  // Attempt to convert data into a String value using the supplied encoding; 
  // return the String value or empty string if the conversion fails.
  // ***********************************************************************
    private func dataStrValue(data: NSData) -> String? {
      if let strVal = NSString(data: data, encoding: encoding) as? String {
          return strVal
      } else { return "" }
}

  // PUBLIC
  // readLine(): _ -> String? nil | String
  // =========== ____________ ............
  // Read the next line of the file, i.e., up to the next delimiter or end-of-
  // file, whichever occurs first; return the String value of the data found, 
  // or nil upon reaching end-of-file.
  // *************************************************************************
  func readLine() -> String? {
    guard let line = NSMutableData(capacity: buffSize) else {
        print("ERROR setting line")
        exit(EXIT_FAILURE)
    }

    // Loop until a delimiter is found, or end-of-file is reached
    var delimFound = false
    while !delimFound {
        // buffIndex will equal buffEndIndex in three situations, resulting
        // in a (re)filling of the buffer:
        //   1. Upon the initial call;
        //   2. If a search for a delimiter has failed
        //   3. If a delimiter is found at the end of the buffer
        if buffIndex == buffEndIndex {
            if fillBuffer() == 0 {
                return nil
            }
        }

        var lengthToDelim: Int
        let startIndex = buffIndex

        // Find a length of data to place into the line buffer to be
        // returned; reset buffIndex
        if let delim = delimLocation() {
            // SOME VALUE when a delimiter is found; append that amount of
            // data onto the line buffer,and then return the line buffer
            delimFound = true
            lengthToDelim = delim - buffIndex
            buffIndex = delim + 1   // will trigger a refill if at the end
                                    // of the buffer on the next call, but
                                    // first the line will be returned
        } else {
            // NIL if no delimiter left in the buffer; append the rest of
            // the buffer onto the line buffer, refill the buffer, and
            // continue looking
            lengthToDelim = buffEndIndex - buffIndex
            buffIndex = buffEndIndex    // will trigger a refill of buffer
                                        // on the next loop
        }

        line.appendData(buffer.subdataWithRange(
            NSMakeRange(startIndex, lengthToDelim)))
    }

    return dataStrValue(line)
  }
}

它被称为如下:

guard let myStream = LineStream(path: "/path/to/file.txt")
else { exit(EXIT_FAILURE) }

while let s = myStream.readLine() {
  print(s)
}
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.