Compute corrections to SIAM controller clock for corrections to instrument data that used this timestamp - Mike McCann 23 August 2018
The Python programs used to get data into formats to be read into this Notebook are in MBARI's CVS on moonjelly: http://moonjelly.shore.mbari.org/cgi-bin/cvsweb.cgi/DPforSSDS/py/ (they have also been copied to the CCE_Processed/Software/ssds directory). Note: 'BIN' was changed to 'SIN' in data file names in 2017, but there is still some version controlled software and data files that contain the 'bin' name. The SIAM controller logged the realtime clock in a file whose contents are converted to a .csv file that can be easily imported into a Pandas dataframe:
cd /u/ssdsadmin/dev/DPforSSDS/py
./bin_clocks_parse.py /mbari/CCE_Archive/SIN/20151013/SIAMLogs/clocks.log \
> /mbari/CCE_Processed/SIN/20151013/clocks/sin_clocks_20151013.csv
To execute this Notebook mount the cifs://atlas.shore.mbari.org/cce_processed share on your compter and cd to the SIN/20151013/clocks directory (e.g. on a Mac that would be cd /Volumes/CCE_Processed/SIN/20151013/clocks) and then execute the jupyter notebook command in a terminal window. This assumes that you have Anaconda Python 3.6 (or equivalent) installed on your computer.
Read the csv file into a Pandas data frame and show the first 10 records
import pandas as pd
deployment = '20151013'
df = pd.read_csv(f'sin_clocks_{deployment}.csv', parse_dates=True, index_col='realtimeclock_datetime')
df.head()
Plot over time the seconds difference between the real time clock and the system clock
%matplotlib inline
from bokeh.plotting import figure
from bokeh.io import output_notebook, show
from bokeh.resources import INLINE
output_notebook(resources=INLINE, hide_banner=True)
p = figure(title=f"SIN Controller Clock Time Difference with Real Time Clock for deployment {deployment}",
width=900, height=300,
x_axis_type='datetime',
x_axis_label='RTC Time (GMT)',
y_axis_label='seconds_difference')
p.line(df.index, df.seconds_difference, line_width=1)
p.cross(df.index, df.seconds_difference)
_ = show(p)
There are times where the SIN controller clock reset to the time of the Real Time Clock. In the cell below np.where() is used to find the the end points of a change in the slope, the obvious decreases are found with (sdiffs < -15). More subtle changes are found using the Box Zoom control in the above plot and by listing specific selections of the dataframe, e.g.
df.loc['2016-02-29 18:00:00':'2016-03-01 00:00:00']
One segment is flat, it ends on 2016-02-29 18:57:47 where df.realtimeclock_epoch_seconds==1456772267
import numpy as np
# We first shift back 1 period then take the difference in order to get the last point before the decrease
sdiffs = df.shift(periods=-1).seconds_difference.diff()
isdiffs = np.where((sdiffs < -15) |
(df.realtimeclock_epoch_seconds==1456772267))
# Append the last value in order to get the last segment
isdiffs = (np.append(isdiffs[0], [len(df)-1]),)
df.iloc[isdiffs]
Define function that returns slope and intercept of linear fit through data
import statsmodels.api as sm
def linear_fit(X, y):
'Return slope, intercept, and y_hat predicted values'
model = sm.OLS(y, sm.add_constant(X))
results = model.fit()
intercept, slope = results.params
intercept = round(intercept, 4)
slope = round(slope, 9)
y_hat = results.predict(sm.add_constant(X))
return slope, intercept, y_hat
Define some named tuples to help organize the correction information and reuse it in our processing scripts
from collections import namedtuple
ERange = namedtuple('ERange', 'start end')
Equation = namedtuple('Equation', 'slope intercept')
Segment = namedtuple('Segment', 'er, eq')
Loop through the segments of linearly increasing time difference and compute the slope and intercept for a linear fit
# Compute linear regressions for each segment and print out correction item for corrections dictionary
print(f"'{deployment}': [")
df_all_subsets = pd.DataFrame()
for i, (endtime, index) in enumerate(zip(df.iloc[isdiffs].index.values, isdiffs[0])):
if i == 0:
df_subset = df.loc[:endtime].copy()
else:
df_subset = df.loc[df.index.values[last_index+1]:endtime].copy()
X = df_subset.system_epoch_seconds
y = df_subset.realtimeclock_epoch_seconds
slope, intercept, y_hat = linear_fit(X, y)
df_subset['y_hat'] = y_hat # The prediction
df_subset['error'] = df_subset.realtimeclock_epoch_seconds - df_subset.y_hat
df_all_subsets = df_all_subsets.append(df_subset)
last_index = index
er = ERange(df_subset.iloc[0].system_epoch_seconds,
df_subset.loc[endtime].system_epoch_seconds)
eq = Equation(slope, intercept)
seg = Segment(er, eq)
print(f' {seg},')
print(']')
print(f"RMS Error = {np.sqrt(df_subset['error']**2).mean()}")
The result above is copied into the cce_data.py module so that the processing scripts aanderaaoxy_process.py, ctd_process.py, ecotriplet_process.py can convert the SIAM timestamps to a corrected time.
from bokeh.models import LinearAxis, Range1d
p = figure(title=f"Predicted Time with Real Time Clock Difference for deployment {deployment}",
width=900, height=300,
x_axis_type='datetime',
x_axis_label='RTC Time (GMT)',
y_range=(-2, 2),
y_axis_label='error of predicted time (seconds)')
p.extra_y_ranges = {"original": Range1d(start=0, end=1400)}
p.add_layout(LinearAxis(y_range_name="original"), 'right')
p.line(df_all_subsets.index, df_all_subsets.error, line_width=1, color="orange")
p.line(df.index, df.seconds_difference, y_range_name="original", line_width=1)
p.cross(df.index, df.seconds_difference, y_range_name="original")
_ = show(p)
The predicted SIAM time is mostly within 1.5 seconds of the realtime clock with an RMS error of 0.33 seconds.
# The correction for each segment as computed in sin_clocks_20151013.ipynb
corrections = {'20151013': [
Segment(er=ERange(start=1444502651, end=1444682663), eq=Equation(slope=0.999883593, intercept=168151.4752)),
Segment(er=ERange(start=1444685297, end=1455824492), eq=Equation(slope=0.999882777, intercept=169351.954)),
Segment(er=ERange(start=1455826790, end=1456772267), eq=Equation(slope=1.000000029, intercept=-41.9845)),
Segment(er=ERange(start=1456775867, end=1460674929), eq=Equation(slope=0.999882635, intercept=170973.9805)),
]}
Define a function to correct a SIAM system clock epoch second value so that it matches the realtime clock
def correct_esec(siam_es):
# None value returned for siam_es that falls between segments
corrected_es = None
for correction in corrections[deployment]:
if siam_es in range(*correction.er):
corrected_es = correction.eq.slope * siam_es + correction.eq.intercept
else:
next
return corrected_es
Compute time series of corrected SIAM clock times and plot its difference with the real time clock to verify the corrections
from bokeh.models import LinearAxis, Range1d
corrected_es_diff = []
bad_count = 0
for index, row in df.iterrows():
corrected_es = correct_esec(row.system_epoch_seconds)
if corrected_es:
corrected_es_diff.append(row.realtimeclock_epoch_seconds - corrected_es)
else:
#print(f'Ignoring data from siam_es={row.system_epoch_seconds}: No timestamp correction available')
bad_count += 1
print(f'{bad_count} of {len(df)} system_epoch_seconds fell outside of correction segments')
p = figure(title=f"Corrected Time with Real Time Clock Difference for deployment {deployment}",
width=900, height=300,
x_axis_type='datetime',
x_axis_label='RTC Time (GMT)',
y_range=(-2, 2),
y_axis_label='error after correction (seconds)')
p.extra_y_ranges = {"original": Range1d(start=0, end=1400)}
p.add_layout(LinearAxis(y_range_name="original"), 'right')
p.line(df_all_subsets.index, df_all_subsets.error, line_width=1, color="orange")
p.line(df.index, df.seconds_difference, y_range_name="original", line_width=1)
p.cross(df.index, df.seconds_difference, y_range_name="original")
_ = show(p)
It looks like our correct_esec() function and corrections dictionary item correctly corrects the system time.