Optimize CSV parsing by adding skipinitialspace=True to csv.reader and removing unnecessary strip() calls

This commit is contained in:
Олександр Гуранець 2026-02-24 22:11:37 +02:00 committed by hasslesstech
parent 722d45b2a1
commit b772848d4c

View File

@ -1,5 +1,3 @@
from __future__ import annotations
import csv import csv
import time import time
from datetime import datetime from datetime import datetime
@ -29,9 +27,6 @@ class FileDatasource:
self._acc_buf: Optional[List[str]] = None self._acc_buf: Optional[List[str]] = None
self._gps_buf: Optional[List[str]] = None self._gps_buf: Optional[List[str]] = None
self._acc_has_header: Optional[bool] = None
self._gps_has_header: Optional[bool] = None
def startReading(self, *args, **kwargs): def startReading(self, *args, **kwargs):
"""Must be called before read()""" """Must be called before read()"""
if self._started: if self._started:
@ -80,16 +75,17 @@ class FileDatasource:
self._acc_f = open(self.accelerometer_filename, "r", newline="", encoding="utf-8") self._acc_f = open(self.accelerometer_filename, "r", newline="", encoding="utf-8")
self._gps_f = open(self.gps_filename, "r", newline="", encoding="utf-8") self._gps_f = open(self.gps_filename, "r", newline="", encoding="utf-8")
self._acc_reader = csv.reader(self._acc_f) self._acc_reader = csv.reader(self._acc_f, skipinitialspace=True)
self._gps_reader = csv.reader(self._gps_f) self._gps_reader = csv.reader(self._gps_f, skipinitialspace=True)
self._acc_buf = None self._acc_buf = None
self._gps_buf = None self._gps_buf = None
self._acc_has_header, self._acc_buf = self._detect_header_and_buffer( # detect header / buffer first data row (we only need the buffered row)
_, self._acc_buf = self._detect_header_and_buffer(
self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z") self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z")
) )
self._gps_has_header, self._gps_buf = self._detect_header_and_buffer( _, self._gps_buf = self._detect_header_and_buffer(
self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude") self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude")
) )
@ -107,15 +103,13 @@ class FileDatasource:
self._gps_reader = None self._gps_reader = None
self._acc_buf = None self._acc_buf = None
self._gps_buf = None self._gps_buf = None
self._acc_has_header = None
self._gps_has_header = None
def _rewind_acc(self) -> None: def _rewind_acc(self) -> None:
if self._acc_f is None: if self._acc_f is None:
raise RuntimeError("Accelerometer file is not open.") raise RuntimeError("Accelerometer file is not open.")
self._acc_f.seek(0) self._acc_f.seek(0)
self._acc_reader = csv.reader(self._acc_f) self._acc_reader = csv.reader(self._acc_f, skipinitialspace=True)
self._acc_has_header, self._acc_buf = self._detect_header_and_buffer( _, self._acc_buf = self._detect_header_and_buffer(
self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z") self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z")
) )
@ -123,8 +117,8 @@ class FileDatasource:
if self._gps_f is None: if self._gps_f is None:
raise RuntimeError("GPS file is not open.") raise RuntimeError("GPS file is not open.")
self._gps_f.seek(0) self._gps_f.seek(0)
self._gps_reader = csv.reader(self._gps_f) self._gps_reader = csv.reader(self._gps_f, skipinitialspace=True)
self._gps_has_header, self._gps_buf = self._detect_header_and_buffer( _, self._gps_buf = self._detect_header_and_buffer(
self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude") self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude")
) )
@ -144,7 +138,6 @@ class FileDatasource:
self._rewind_acc() self._rewind_acc()
continue continue
row = [c.strip() for c in row]
if not row or not any(row): if not row or not any(row):
continue continue
@ -166,7 +159,6 @@ class FileDatasource:
self._rewind_gps() self._rewind_gps()
continue continue
row = [c.strip() for c in row]
if not row or not any(row): if not row or not any(row):
continue continue
@ -182,7 +174,6 @@ class FileDatasource:
first = next(rdr, None) first = next(rdr, None)
if first is None: if first is None:
return False, None return False, None
first = [c.strip() for c in first]
if first and any(first): if first and any(first):
break break
@ -209,9 +200,7 @@ class FileDatasource:
y = int(row[1]) y = int(row[1])
z = int(row[2]) z = int(row[2])
except ValueError as e: except ValueError as e:
raise ValueError( raise ValueError(f"Invalid accelerometer values (expected integers): {row}") from e
f"Invalid accelerometer values (expected integers): {row}"
) from e
return Accelerometer(x=x, y=y, z=z) return Accelerometer(x=x, y=y, z=z)