1
0
mirror of https://github.com/duke-git/lancet.git synced 2026-02-04 12:52:28 +08:00

feat: add support for seeking and read one line at a time from file (#158)

* feat: add support for seeking and read one line at a time from file

* feat: add support for calculating folder total size

---------

Co-authored-by: Suacrbah <5744580+Suacrbah@user.noreply.gitee.com>
This commit is contained in:
suacrbah
2024-01-01 16:50:54 +08:00
committed by GitHub
parent 1b1b10d0ee
commit 565f2893b9
2 changed files with 113 additions and 0 deletions

View File

@@ -25,6 +25,61 @@ import (
"github.com/duke-git/lancet/v2/validator"
)
// FileReader is a reader supporting offset seeking and reading one
// line at a time, this is especially useful for large files
type FileReader struct {
*bufio.Reader
file *os.File
offset int64
}
// NewFileReader creates the FileReader struct for reading
func NewFileReader(path string) (*FileReader, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
return &FileReader{
file: f,
Reader: bufio.NewReader(f),
offset: 0,
}, nil
}
// ReadLine reads and returns one line at a time excluding the trailing '\r' and '\n'
func (f *FileReader) ReadLine() (string, error) {
data, err := f.Reader.ReadBytes('\n')
f.offset += int64(len(data))
if err == nil || err == io.EOF {
for len(data) > 0 && (data[len(data)-1] == '\r' || data[len(data)-1] == '\n') {
data = data[:len(data)-1]
}
return string(data), err
}
return "", err
}
// Offset returns the current offset of the file
func (f *FileReader) Offset() int64 {
return f.offset
}
// Seek sets the current offset of the reading
func (f *FileReader) Seek(offset int64) error {
_, err := f.file.Seek(offset, 0)
if err != nil {
return err
}
f.Reader = bufio.NewReader(f.file)
f.offset = offset
return nil
}
// Close takes care of the opened file
func (f *FileReader) Close() error {
return f.file.Close()
}
// IsExist checks if a file or directory exists.
// Play: https://go.dev/play/p/nKKXt8ZQbmh
func IsExist(path string) bool {
@@ -508,6 +563,25 @@ func FileSize(path string) (int64, error) {
return f.Size(), nil
}
// DirSize walks the folder recursively and returns folder size in bytes.
func DirSize(path string) (int64, error) {
var size int64
err := filepath.WalkDir(path, func(_ string, d os.DirEntry, err error) error {
if err != nil {
return err
}
if !d.IsDir() {
info, err := d.Info()
if err != nil {
return err
}
size += info.Size()
}
return err
})
return size, err
}
// MTime returns file modified time.
// Play: https://go.dev/play/p/s_Tl7lZoAaY
func MTime(filepath string) (int64, error) {

View File

@@ -476,3 +476,42 @@ Disallow: /deny
`
internal.NewAssert(t, "TestReadFile").Equal(want, string(dat))
}
func TestReadlineFile(t *testing.T) {
path := "./testdata/demo.csv"
reader, err := NewFileReader(path)
if err != nil {
t.Fail()
}
defer reader.Close()
indexMap := make(map[string]int64)
defer reader.Close()
for {
offset := reader.Offset()
line, err := reader.ReadLine()
if err == io.EOF {
break
}
indexMap[line] = offset
}
lines, err := ReadFileByLine(path)
if err != nil {
t.Fail()
}
for _, line := range lines {
offset, ok := indexMap[line]
if !ok {
t.Fail()
}
if err = reader.Seek(offset); err != nil {
t.Fail()
}
lineRead, err := reader.ReadLine()
if err == io.EOF {
break
}
internal.NewAssert(t, "TestReadlineFile").Equal(line, lineRead)
}
}