FileReader #5

Merged
bacant150 merged 15 commits from lab1_huranets into lab1 2026-02-26 10:04:10 +02:00
2 changed files with 217 additions and 29 deletions
Showing only changes of commit c974ac32f6 - Show all commits

View File

@@ -1,5 +1,11 @@
from csv import reader
from __future__ import annotations
hasslesstech commented 2026-02-24 19:37:02 +02:00 (Migrated from github.com)
Review

Naming convention is misleading, suggests parser specifically takes 16-bit integers. This parser should read any integer value and should just be int(row[0]), not a separate function call

Several identical issues occur in the next few lines

Naming convention is misleading, suggests parser specifically takes 16-bit integers. This parser should read any integer value and should just be `int(row[0])`, not a separate function call Several identical issues occur in the next few lines
hasslesstech commented 2026-02-24 19:37:45 +02:00 (Migrated from github.com)
Review

Unused variables, seem to be leftovers from previously removed INT16 boundary checking

Unused variables, seem to be leftovers from previously removed INT16 boundary checking
bacant150 commented 2026-02-24 20:01:55 +02:00 (Migrated from github.com)
Review

removed unnecessary variables

removed unnecessary variables
hasslesstech commented 2026-02-24 20:04:13 +02:00 (Migrated from github.com)
Review

Method is not used and does not provide additional functionality compared to default int() function, so should be removed

Method is not used and does not provide additional functionality compared to default `int()` function, so should be removed
bacant150 commented 2026-02-24 20:23:35 +02:00 (Migrated from github.com)
Review

Fixed: removed int16-specific naming/helpers and now parse accelerometer values directly using int(row[i]) without any bit-width assumptions. Pushed the update to the PR.

Fixed: removed int16-specific naming/helpers and now parse accelerometer values directly using int(row[i]) without any bit-width assumptions. Pushed the update to the PR.
bacant150 commented 2026-02-24 20:24:06 +02:00 (Migrated from github.com)
Review

Removed the unused _parse_int helper (it didn’t add any functionality over built-in int()). Pushed the update to the PR.

Removed the unused _parse_int helper (it didn’t add any functionality over built-in int()). Pushed the update to the PR.
VladiusVostokus commented 2026-02-25 11:17:41 +02:00 (Migrated from github.com)
Review

And after chage of rewinders, you can remove _acc_buf to hold first row

And after chage of rewinders, you can remove _acc_buf to hold first row
VladiusVostokus commented 2026-02-25 11:24:43 +02:00 (Migrated from github.com)
Review

What does this means?
if assuming this is a header with indexies of domain objects maybe make sense.

What does this means? if assuming this is a header with indexies of domain objects maybe make sense.
bacant150 commented 2026-02-25 12:14:42 +02:00 (Migrated from github.com)
Review

Removed unnecessary buffers like _acc_buf and _gps_buf.

Removed unnecessary buffers like _acc_buf and _gps_buf.
bacant150 commented 2026-02-25 12:53:57 +02:00 (Migrated from github.com)
Review

I initially tried removing this part, but the testing failed without it

I initially tried removing this part, but the testing failed without it
import csv
import time
from datetime import datetime
from pathlib import Path
from typing import Optional, List
from domain.accelerometer import Accelerometer
from domain.gps import Gps
from domain.aggregated_data import AggregatedData
@@ -7,24 +13,213 @@ import config
class FileDatasource:
def __init__(
self,
accelerometer_filename: str,
gps_filename: str,
) -> None:
pass
def read(self) -> AggregatedData:
"""Метод повертає дані отримані з датчиків"""
return AggregatedData(
Accelerometer(1, 2, 3),
Gps(4, 5),
datetime.now(),
config.USER_ID,
)
def __init__(self, accelerometer_filename: str, gps_filename: str) -> None:
self.accelerometer_filename = accelerometer_filename
self.gps_filename = gps_filename
self._acc_f = None
self._gps_f = None
self._acc_reader: Optional[csv.reader] = None
self._gps_reader: Optional[csv.reader] = None
self._started = False
# one-row buffers (supports CSVs with or without header)
self._acc_buf: Optional[List[str]] = None
self._gps_buf: Optional[List[str]] = None
self._acc_has_header: Optional[bool] = None
self._gps_has_header: Optional[bool] = None
def startReading(self, *args, **kwargs):
"""Метод повинен викликатись перед початком читання даних"""
"""Must be called before read()"""
if self._started:
return
if not Path(self.accelerometer_filename).exists():
raise FileNotFoundError(f"Accelerometer file not found: {self.accelerometer_filename}")
if not Path(self.gps_filename).exists():
raise FileNotFoundError(f"GPS file not found: {self.gps_filename}")
self._open_files()
self._started = True
def stopReading(self, *args, **kwargs):
"""Метод повинен викликатись для закінчення читання даних"""
"""Must be called when finishing reading"""
self._close_files()
self._started = False
def read(self) -> AggregatedData:
"""Return one combined sample (acc + gps)."""
if not self._started:
raise RuntimeError("Datasource is not started. Call startReading() before read().")
acc_row = self._next_acc_row()
gps_row = self._next_gps_row()
acc = self._parse_acc(acc_row)
gps = self._parse_gps(gps_row)
# IMPORTANT: timing belongs to datasource (not MQTT / main.py)
if config.DELAY and config.DELAY > 0:
time.sleep(float(config.DELAY))
return AggregatedData(
accelerometer=acc,
gps=gps,
time=datetime.utcnow(),
user_id=config.USER_ID,
)
# ---------------- internal ----------------
VladiusVostokus commented 2026-02-25 11:20:05 +02:00 (Migrated from github.com)
Review

You just can not return this variable from _detect_header_and_buffer() if it is not used

You just can not return this variable from _detect_header_and_buffer() if it is not used
bacant150 commented 2026-02-25 12:36:28 +02:00 (Migrated from github.com)
Review

removed unnecessary variables from _detect_header_and_buffer() and fixed the _rewind_acc() and _rewind_gps() methods

removed unnecessary variables from _detect_header_and_buffer() and fixed the _rewind_acc() and _rewind_gps() methods
def _open_files(self) -> None:
self._close_files()
VladiusVostokus commented 2026-02-25 16:05:28 +02:00 (Migrated from github.com)
Review

if you open file, your read pointer is already 0, why to set it to 0 again?

if you open file, your read pointer is already 0, why to set it to 0 again?
bacant150 commented 2026-02-25 21:09:54 +02:00 (Migrated from github.com)
Review

I removed the unnecessary rewind from _open_files().

I removed the unnecessary rewind from _open_files().
self._acc_f = open(self.accelerometer_filename, "r", newline="", encoding="utf-8")
self._gps_f = open(self.gps_filename, "r", newline="", encoding="utf-8")
self._acc_reader = csv.reader(self._acc_f)
self._gps_reader = csv.reader(self._gps_f)
self._acc_buf = None
self._gps_buf = None
self._acc_has_header, self._acc_buf = self._detect_header_and_buffer(
self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z")
)
self._gps_has_header, self._gps_buf = self._detect_header_and_buffer(
self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude")
)
def _close_files(self) -> None:
for f in (self._acc_f, self._gps_f):
try:
if f is not None:
f.close()
except Exception:
pass
self._acc_f = None
VladiusVostokus commented 2026-02-25 16:06:38 +02:00 (Migrated from github.com)
Review

Why leave _ here if it's not used?

Why leave _ here if it's not used?
bacant150 commented 2026-02-25 21:13:00 +02:00 (Migrated from github.com)
Review

Good catch. The _detect_header_and_buffer helper wasn’t used and didn’t modify class state, so I removed it.

Good catch. The _detect_header_and_buffer helper wasn’t used and didn’t modify class state, so I removed it.
self._gps_f = None
self._acc_reader = None
self._gps_reader = None
self._acc_buf = None
self._gps_buf = None
self._acc_has_header = None
self._gps_has_header = None
VladiusVostokus commented 2026-02-25 11:14:51 +02:00 (Migrated from github.com)
Review

you can just skip first row after seek(0) knowing that this is a header, not actual value,
so you can write like
self._acc_f.seek(0)
self._acc_reader = csv.reader(self._acc_f)
next(self._acc_reader)
row = next(self.acc_reader)

you can just skip first row after seek(0) knowing that this is a header, not actual value, so you can write like self._acc_f.seek(0) self._acc_reader = csv.reader(self._acc_f) next(self._acc_reader) row = next(self.acc_reader)
VladiusVostokus commented 2026-02-25 11:15:57 +02:00 (Migrated from github.com)
Review

And do the same for other rewinders

And do the same for other rewinders
bacant150 commented 2026-02-25 12:02:34 +02:00 (Migrated from github.com)
Review

Refactoring file rewind logic to skip header line after seek(0) and for other rewinders

Refactoring file rewind logic to skip header line after seek(0) and for other rewinders
def _rewind_acc(self) -> None:
if self._acc_f is None:
VladiusVostokus commented 2026-02-25 15:52:46 +02:00 (Migrated from github.com)
Review

Why leave _ here if it's not used?
Maybe dont return anything from this method?

Why leave _ here if it's not used? Maybe dont return anything from this method?
VladiusVostokus commented 2026-02-25 16:00:51 +02:00 (Migrated from github.com)
Review

As I see this method doesn't modify any class fiels, so maybe just remove this method?

As I see this method doesn't modify any class fiels, so maybe just remove this method?
bacant150 commented 2026-02-25 20:57:57 +02:00 (Migrated from github.com)
Review

The _detect_header_and_buffer method was not modifying any class state and its return value was unused.
I removed the method and simplified the logic.

The _detect_header_and_buffer method was not modifying any class state and its return value was unused. I removed the method and simplified the logic.
raise RuntimeError("Accelerometer file is not open.")
self._acc_f.seek(0)
self._acc_reader = csv.reader(self._acc_f)
self._acc_has_header, self._acc_buf = self._detect_header_and_buffer(
self._acc_reader, expected_cols=3, header_tokens=("x", "y", "z")
)
def _rewind_gps(self) -> None:
if self._gps_f is None:
raise RuntimeError("GPS file is not open.")
self._gps_f.seek(0)
self._gps_reader = csv.reader(self._gps_f)
self._gps_has_header, self._gps_buf = self._detect_header_and_buffer(
self._gps_reader, expected_cols=2, header_tokens=("longitude", "latitude")
)
VladiusVostokus commented 2026-02-25 11:10:29 +02:00 (Migrated from github.com)
Review

This looks a bit comlex
Why itarate though loop to get 1 row?

This looks a bit comlex Why itarate though loop to get 1 row?
bacant150 commented 2026-02-25 11:53:08 +02:00 (Migrated from github.com)
Review

"Simplified the line reading logic by replacing the while True loop with a clearer check

"Simplified the line reading logic by replacing the while True loop with a clearer check
def _next_acc_row(self) -> List[str]:
if self._acc_reader is None:
raise RuntimeError("Accelerometer reader is not initialized.")
while True:
if self._acc_buf is not None:
row = self._acc_buf
self._acc_buf = None
else:
row = next(self._acc_reader, None)
if row is None:
# EOF -> rewind & continue
self._rewind_acc()
continue
row = [c.strip() for c in row]
if not row or not any(row):
continue
return row
def _next_gps_row(self) -> List[str]:
if self._gps_reader is None:
raise RuntimeError("GPS reader is not initialized.")
while True:
if self._gps_buf is not None:
row = self._gps_buf
self._gps_buf = None
else:
row = next(self._gps_reader, None)
if row is None:
# EOF -> rewind & continue
self._rewind_gps()
continue
row = [c.strip() for c in row]
if not row or not any(row):
continue
return row
@staticmethod
def _detect_header_and_buffer(
rdr: csv.reader, expected_cols: int, header_tokens: tuple[str, ...]
) -> tuple[bool, Optional[List[str]]]:
first = None
while True:
first = next(rdr, None)
if first is None:
return False, None
first = [c.strip() for c in first]
if first and any(first):
break
norm = [c.lower() for c in first]
# Header if it contains the expected tokens
if all(tok in norm for tok in header_tokens):
return True, None
# If first row is numeric-like and has enough columns => it's data (buffer it back)
if len(norm) >= expected_cols and all(FileDatasource._is_number(x) for x in norm[:expected_cols]):
return False, first
# Otherwise treat it as header-ish (skip it)
hasslesstech commented 2026-02-24 20:17:50 +02:00 (Migrated from github.com)
Review

Removal of checks is not neccesary, but is plausible since the source file should only contain correct data

Removal of checks is not neccesary, but is plausible since the source file *should* only contain correct data
bacant150 commented 2026-02-24 20:50:52 +02:00 (Migrated from github.com)
Review

Resumed checks

Resumed checks
return True, None
@staticmethod
hasslesstech commented 2026-02-24 15:25:56 +02:00 (Migrated from github.com)
Review

This value range check might break future integration with higher precision accelerometer data, hence should be removed

This value range check might break future integration with higher precision accelerometer data, hence should be removed
def _parse_acc(row: List[str]) -> Accelerometer:
if len(row) < 3:
raise ValueError(f"Accelerometer row must have 3 values (x,y,z). Got: {row}")
x = int(float(row[0]))
y = int(float(row[1]))
z = int(float(row[2]))
return Accelerometer(x=x, y=y, z=z)
@staticmethod
def _parse_gps(row: List[str]) -> Gps:
if len(row) < 2:
raise ValueError(f"GPS row must have 2 values (longitude,latitude). Got: {row}")
lon = float(row[0])
lat = float(row[1])
return Gps(longitude=lon, latitude=lat)
@staticmethod
def _is_number(s: str) -> bool:
try:
float(s)
return True
except Exception:
return False

View File

@@ -1,6 +1,4 @@
from paho.mqtt import client as mqtt_client
import json
import time
from schema.aggregated_data_schema import AggregatedDataSchema
from file_datasource import FileDatasource
import config
@@ -24,19 +22,14 @@ def connect_mqtt(broker, port):
return client
def publish(client, topic, datasource, delay):
def publish(client, topic, datasource):
datasource.startReading()
while True:
time.sleep(delay)
data = datasource.read()
msg = AggregatedDataSchema().dumps(data)
result = client.publish(topic, msg)
# result: [0, 1]
status = result[0]
if status == 0:
pass
# print(f"Send `{msg}` to topic `{topic}`")
else:
if status != 0:
print(f"Failed to send message to topic {topic}")
@@ -44,9 +37,9 @@ def run():
# Prepare mqtt client
client = connect_mqtt(config.MQTT_BROKER_HOST, config.MQTT_BROKER_PORT)
# Prepare datasource
datasource = FileDatasource("data/data.csv", "data/gps_data.csv")
datasource = FileDatasource("data/accelerometer.csv", "data/gps.csv")
# Infinity publish data
publish(client, config.MQTT_TOPIC, datasource, config.DELAY)
publish(client, config.MQTT_TOPIC, datasource)
if __name__ == "__main__":