diff --git a/fileutil/file.go b/fileutil/file.go index 0cd4531..18f2b68 100644 --- a/fileutil/file.go +++ b/fileutil/file.go @@ -25,6 +25,61 @@ import ( "github.com/duke-git/lancet/v2/validator" ) +// FileReader is a reader supporting offset seeking and reading one +// line at a time, this is especially useful for large files +type FileReader struct { + *bufio.Reader + file *os.File + offset int64 +} + +// NewFileReader creates the FileReader struct for reading +func NewFileReader(path string) (*FileReader, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + return &FileReader{ + file: f, + Reader: bufio.NewReader(f), + offset: 0, + }, nil +} + +// ReadLine reads and returns one line at a time excluding the trailing '\r' and '\n' +func (f *FileReader) ReadLine() (string, error) { + data, err := f.Reader.ReadBytes('\n') + f.offset += int64(len(data)) + if err == nil || err == io.EOF { + for len(data) > 0 && (data[len(data)-1] == '\r' || data[len(data)-1] == '\n') { + data = data[:len(data)-1] + } + return string(data), err + } + return "", err +} + +// Offset returns the current offset of the file +func (f *FileReader) Offset() int64 { + return f.offset +} + +// Seek sets the current offset of the reading +func (f *FileReader) Seek(offset int64) error { + _, err := f.file.Seek(offset, 0) + if err != nil { + return err + } + f.Reader = bufio.NewReader(f.file) + f.offset = offset + return nil +} + +// Close takes care of the opened file +func (f *FileReader) Close() error { + return f.file.Close() +} + // IsExist checks if a file or directory exists. // Play: https://go.dev/play/p/nKKXt8ZQbmh func IsExist(path string) bool { @@ -508,6 +563,25 @@ func FileSize(path string) (int64, error) { return f.Size(), nil } +// DirSize walks the folder recursively and returns folder size in bytes. +func DirSize(path string) (int64, error) { + var size int64 + err := filepath.WalkDir(path, func(_ string, d os.DirEntry, err error) error { + if err != nil { + return err + } + if !d.IsDir() { + info, err := d.Info() + if err != nil { + return err + } + size += info.Size() + } + return err + }) + return size, err +} + // MTime returns file modified time. // Play: https://go.dev/play/p/s_Tl7lZoAaY func MTime(filepath string) (int64, error) { diff --git a/fileutil/file_test.go b/fileutil/file_test.go index fde1f63..5bf13df 100644 --- a/fileutil/file_test.go +++ b/fileutil/file_test.go @@ -476,3 +476,42 @@ Disallow: /deny ` internal.NewAssert(t, "TestReadFile").Equal(want, string(dat)) } + +func TestReadlineFile(t *testing.T) { + path := "./testdata/demo.csv" + reader, err := NewFileReader(path) + if err != nil { + t.Fail() + } + defer reader.Close() + + indexMap := make(map[string]int64) + defer reader.Close() + for { + offset := reader.Offset() + line, err := reader.ReadLine() + if err == io.EOF { + break + } + indexMap[line] = offset + } + + lines, err := ReadFileByLine(path) + if err != nil { + t.Fail() + } + for _, line := range lines { + offset, ok := indexMap[line] + if !ok { + t.Fail() + } + if err = reader.Seek(offset); err != nil { + t.Fail() + } + lineRead, err := reader.ReadLine() + if err == io.EOF { + break + } + internal.NewAssert(t, "TestReadlineFile").Equal(line, lineRead) + } +}