import pandas as pd
import numpy as np
from typing import List, Dict, Union, Optional
import plotly.graph_objects as go
# validate test variables to equal 0, 1, 2
[docs]
def validate_flags(df: pd.DataFrame,
flag_columns: List[str] = ['FC_SSITC_TEST', 'LE_SSITC_TEST', 'ET_SSITC_TEST', 'H_SSITC_TEST',
'TAU_SSITC_TEST'],
allowed_values: List[int] = [0, 1, 2]) -> Dict[str, List]:
"""
Checks specified DataFrame columns for values outside of the allowed set,
including checking for NaN (missing) values.
This is typically used for quality control (QC) flag columns which should
only contain specific integer values (like 0, 1, 2).
Parameters
----------
df : pd.DataFrame
The input DataFrame containing the flag columns.
flag_columns : List[str]
A list of column names to check.
allowed_values : List[int]
The list of values considered valid (defaults to [0, 1, 2]).
Returns
-------
Dict[str, List]
A dictionary where keys are the column names that failed validation,
and values are a list of the unique, invalid values found in that column,
including the string "NaN" if missing values are present.
"""
# Convert allowed_values to a set for faster lookup
allowed_set = set(allowed_values)
# Dictionary to store results for columns that fail the validation
invalid_columns = {}
print(f"--- Starting Validation ---")
print(f"Checking columns: {flag_columns}")
print(f"Allowed values: {allowed_set}")
ssitc_bases = [
"FC_SSITC_TEST",
"LE_SSITC_TEST",
"ET_SSITC_TEST",
"H_SSITC_TEST",
"TAU_SSITC_TEST",
]
columns_to_check = [
col for col in df.columns
if any(col.startswith(base) for base in ssitc_bases)
]
for col in columns_to_check:
if col not in df.columns:
print(f"Warning: Column '{col}' not found in DataFrame.")
continue
# 1. Find all unique values in the series, including NaNs
unique_values = df[col].unique()
# 2. Separate NaNs, valid flags, and invalid numeric flags
invalid_numeric_flags = []
nan_present = False
for val in unique_values:
if pd.isna(val):
nan_present = True
elif val not in allowed_set:
invalid_numeric_flags.append(val)
# 3. Construct the final report list (numeric values first, then "NaN" indicator)
final_report_list = sorted(invalid_numeric_flags)
if nan_present:
final_report_list.append("NaN")
if final_report_list:
invalid_columns[col] = final_report_list
print(f"FAIL: Column '{col}' contains unexpected values: {final_report_list}")
else:
print(f"PASS: Column '{col}' contains only valid values.")
print(f"--- Validation Complete ---")
return invalid_columns
# Compare field names between dataframe and amerriflux variable names
[docs]
def compare_names_to_ameriflux(
df_full: pd.DataFrame,
amflux: Union[pd.DataFrame, pd.Series]
) -> pd.DataFrame:
"""
Cleans column names of df_full by removing '_1', '_2', '_3', and '_4'
suffixes, compares the cleaned names against an 'amflux' variable list,
and returns a DataFrame of the results, along with printing the unmatched columns.
Args:
df_full: The DataFrame whose columns need to be cleaned and matched.
amflux: A DataFrame or Series that contains the 'Variable' column
or is the Series of variables to match against.
Returns:
A DataFrame containing the original columns, the cleaned columns,
and a boolean indicating if the cleaned column is in the amflux list.
"""
# 1. Column Cleaning Logic
clean_columns = list(df_full.columns)
# Iteratively remove suffixes: '_1', '_2', '_3', '_4'
# This loop is a condensed way to achieve the same result as the four
# separate list comprehensions in the original code.
suffixes_to_remove = ['_1', '_2', '_3', '_4']
for suffix in suffixes_to_remove:
clean_columns = [item.split(suffix)[0] for item in clean_columns]
clean_columns_series = pd.Series(clean_columns)
# 2. Determine the AMERIFLUX Variable List for Matching
# Handle both Series and DataFrame inputs for amflux
if isinstance(amflux, pd.DataFrame) and 'Variable' in amflux.columns:
amflux_variables = amflux['Variable']
elif isinstance(amflux, pd.Series):
amflux_variables = amflux
else:
raise ValueError("The 'amflux' argument must be a pandas Series or a DataFrame with a 'Variable' column.")
# 3. Matching
is_in_amflux = clean_columns_series.isin(amflux_variables)
# 4. Create Results DataFrame
results_df = pd.DataFrame({
'all_columns': df_full.columns,
'clean_columns': clean_columns,
'is_in_amflux': is_in_amflux
})
# 5. Print and Return
unmatched_df = results_df[results_df.is_in_amflux == False].sort_values('clean_columns')
print('COLUMNS NOT IN AMERIFLUX VARIABLE LIST\n')
print(unmatched_df)
return results_df
# compare alignment between two files (one raw that is read in and one from micromet)
[docs]
def compare_to_raw(raw_file_path, micromet_df, test_var = 'NETRAD', threshold=0.1):
'''Compares a specific variable between a raw data file and a micromet DataFrame.
The function reads a 'raw' DAT or CSV file from the provided path, merges it with the
'micromet' DataFrame based on TIMESTAMP to DATETIME_END fields, and calculates the absolute
difference for a specified variable (`test_var`) between the two sources. It
returns only the rows where this absolute difference is greater than the given
`threshold`.
Args:
raw_file_path (str): The file path to the raw data CSV file. This file is
assumed to have a specific format (header on row 1, with
rows 2 and 3 skipped).
micromet_df (pd.DataFrame): DataFrame containing the micrometeorological data.
test_var (str, optional): The variable to compare (e.g., 'LE' for Latent Energy).
Defaults to 'LE'. The function assumes the raw
column is named '{test_var}_1_1_1' and the micromet
column is named '{test_var}'.
threshold (float, optional): The absolute difference threshold. Rows where
|raw_value - micromet_value| > threshold are returned.
Defaults to 0.1.
Returns:
pd.DataFrame: A DataFrame containing the 'DATETIME_END' and the values of the
`test_var` from both sources ('{test_var}_1_1_1' and '{test_var}')
for all rows where the absolute difference exceeds the `threshold`.
'''
raw = pd.read_csv(raw_file_path, skiprows=[2,3], header=1, low_memory=False)
raw['TIMESTAMP'] = pd.to_datetime(raw['TIMESTAMP'])
combo = raw.merge(micromet_df, how='inner', left_on='TIMESTAMP', right_on='DATETIME_END',
suffixes=['_raw', '_micromet'])
le_diff = combo[f'{test_var}_1_1_1'] -combo[f'{test_var}'].astype('float')
value_differences = combo.loc[(le_diff.abs()>threshold), ['DATETIME_END',f'{test_var}_1_1_1', f'{test_var}']]
return(value_differences)
# check for consistency between DATETIME_END and TIMESTAMP_END fields
[docs]
def validate_timestamp_consistency(df: pd.DataFrame) -> pd.DataFrame:
"""
Checks for consistency between a standardized datetime column (DATETIME_END)
and a string/integer timestamp column (TIMESTAMP_START) formatted as YYYYMMDDHHMM.
Parameters
----------
df : pd.DataFrame
The input DataFrame containing the columns to check.
Returns
-------
pd.DataFrame
A DataFrame containing only the rows where the DATETIME_END and
the converted TIMESTAMP_END columns do not match, along with both columns
for inspection. Returns an empty DataFrame if all rows match.
"""
df = df.copy()
REQUIRED_COLS = ['DATETIME_END', 'TIMESTAMP_END']
if not all(col in df.columns for col in REQUIRED_COLS):
print(f"Error: DataFrame must contain both {REQUIRED_COLS} columns.")
return pd.DataFrame()
print("\n--- Starting Timestamp Consistency Validation ---")
# Ensure DATETIME_END is properly parsed datetime object
df['DATETIME_END_DT'] = pd.to_datetime(df['DATETIME_END'], errors='coerce')
# Convert TIMESTAMP_END (e.g., 202406241430) to a datetime object
# We convert to string first to handle both int and string inputs
df['TIMESTAMP_END_DT'] = pd.to_datetime(
df['TIMESTAMP_END'].astype(str),
format='%Y%m%d%H%M',
errors='coerce'
)
# Compare the two generated datetime columns
# We use .notna() to ignore rows where either conversion failed (coerced to NaT)
mismatch_mask = (df['DATETIME_END_DT'] != df['TIMESTAMP_END_DT']) & \
(df['DATETIME_END_DT'].notna()) & \
(df['TIMESTAMP_END_DT'].notna())
# Filter for mismatches and report
mismatch_report = df.loc[mismatch_mask, REQUIRED_COLS + ['DATETIME_END_DT', 'TIMESTAMP_END_DT']].copy()
if mismatch_report.empty:
print("PASS: DATETIME_END and TIMESTAMP_END are perfectly consistent (where both are valid).")
else:
print(f"FAIL: Found {len(mismatch_report)} inconsistent rows.")
print("--- Timestamp Consistency Validation Complete ---")
return mismatch_report
# Find extended periods of time where sensor read 0 values (used with precip data)
[docs]
def find_zero_chunks(
df: pd.DataFrame,
var_name: str,
days_threshold: int,
aggregation_method: str = 'sum', # New parameter to determine daily aggregation
tolerance: float = 1e-6
) -> pd.DataFrame:
"""
Identifies continuous chunks of time where a variable is effectively zero or NaN,
treating NaNs as part of the zero gap.
The function first resamples the high-frequency data to daily ('D') frequency
using the specified aggregation method before checking for long zero periods.
Args:
df: The pandas DataFrame with a DatetimeIndex (any frequency).
var_name: The name of the column to check for zero values.
days_threshold: The minimum number of consecutive days required to be
identified as a "long zero chunk".
aggregation_method: The method used to aggregate high-frequency data to daily.
Options: 'sum' (default) or 'max'.
tolerance: A small value used to check if a float is close to zero.
Returns:
A DataFrame listing the 'Start Day', 'End Day', and 'Duration (Days)'
for each identified long zero chunk.
"""
if not isinstance(df.index, pd.DatetimeIndex):
print("Error: DataFrame index must be a DatetimeIndex.")
return pd.DataFrame()
if aggregation_method not in ['sum', 'max']:
print(f"Error: Invalid aggregation_method '{aggregation_method}'. Must be 'sum' or 'max'.")
return pd.DataFrame()
# 1. Resample the data to daily frequency ('D') using the specified method
# The resulting index represents the start of each day.
try:
df_daily = df[var_name].resample('D').agg(aggregation_method)
except Exception as e:
print(f"Error during resampling with method '{aggregation_method}': {e}")
return pd.DataFrame()
# The period threshold is now simply the number of days, as we are working with daily periods.
# period_threshold is kept for clarity but is equal to days_threshold
period_threshold = days_threshold
# 2. Create a boolean mask: True if daily aggregated value is near zero OR is NaN
is_zero_or_na = (df_daily.abs() < tolerance) | (df_daily.isna())
# 3. Calculate consecutive count of 'is_zero_or_na' being True
# (~is_zero_or_na).cumsum() creates a group ID that changes only when a non-zero day is found.
consecutive_zero_count = is_zero_or_na.astype(int).groupby(
(~is_zero_or_na).cumsum()
).cumsum()
# 4. Determine chunk end points using boolean mask differences
# We create a mask for where the streak is broken (non-zero value occurs)
is_streak_broken = (~is_zero_or_na).astype(int).diff().fillna(0)
# An end occurs on the period *before* the streak is broken (where is_streak_broken == 1)
# The current day is the first non-zero day, so we shift back by one day to get the end of the zero chunk.
end_indices_before_transition = is_streak_broken[is_streak_broken == 1].index - pd.Timedelta(days=1)
# If the DataFrame ends while still in a zero chunk, the end time is the last index
if is_zero_or_na.iloc[-1]:
end_indices_before_transition = end_indices_before_transition.append(pd.Index([df_daily.index[-1]]))
# Use unique, sorted list of all valid end points
all_end_indices = end_indices_before_transition.unique().sort_values()
# 5. Calculate the Start Day by backtracking from the End Day
chunks: List[Dict[str, Union[pd.Timestamp, float]]] = []
# Keep track of starts to avoid processing overlapping chunks (if any)
processed_start_days = set()
for end in all_end_indices:
# Get the length of the consecutive zero run ending on this day
# We must ensure the index 'end' is in the index for lookup
if end in consecutive_zero_count.index:
streak_length = consecutive_zero_count.loc[end]
# Only process if the streak length meets the threshold
if streak_length >= days_threshold:
# Calculate the TRUE start day: Start = End - (Length - 1 day)
# This correctly identifies the absolute first day of the zero streak.
start = end - pd.Timedelta(days=int(streak_length) - 1)
# Duration is simply the streak length
duration = float(streak_length)
normalized_start = start.normalize()
# Check for overlapping chunks before appending (shouldn't happen with this logic, but safe)
if normalized_start not in processed_start_days:
chunks.append({
'Start Day': normalized_start,
'End Day': end.normalize(),
'Duration (Days)': duration
})
processed_start_days.add(normalized_start)
return pd.DataFrame(chunks)
# preps two dataframes to run the compare function
[docs]
def prep_for_comparison(df1: pd.DataFrame, df2: pd.DataFrame) -> tuple[pd.DataFrame, pd.DataFrame]:
"""
Prepares two pandas DataFrames for comparison by:
1. Finding the intersection of columns.
2. Finding the intersection of indices.
3. Returning new DataFrames with only the common columns and indices.
Args:
df1: The first pandas DataFrame.
df2: The second pandas DataFrame.
Returns:
A tuple of two pandas DataFrames (df1_prep, df2_prep) ready for comparison.
"""
# 1. Find the intersection of columns
common_cols = df1.columns.intersection(df2.columns)
# 2. Find the intersection of indices
common_indices = df1.index.intersection(df2.index)
# 3. Create new DataFrames with only common columns and indices
df1_prep = df1.loc[common_indices, common_cols]
df2_prep = df2.loc[common_indices, common_cols]
return df1_prep, df2_prep
# check differences between dataframes column by column, ignoring Na values
[docs]
def data_diff_check(df1, df2):
"""
Calculates the percent of non-null fields that differ between two
dataframes, for all column pairs with identical names.
Note: It can be helpful to round the dataframes first if you only
want to note larger differences
Parameters
----------
df1 : DataFrame with a DatatimeIndex
df2 : DataFrame with a DatatimeIndex
Returns
-------
pd.DataFrame
Dataframe with column names as index and percent (not proportion!)
of values that differ in that column between dataframes
"""
common_cols = df1.columns.intersection(df2.columns)
percent_diff = {}
if len(common_cols)>0:
for col in common_cols:
df1 = df1.dropna(subset = [col])
df2 = df2.dropna(subset = [col])
df1_prep, df2_prep = prep_for_comparison(df1, df2)
x = df1_prep[col].compare(df2_prep[col])
if len(df1_prep)>0:
diff = len(x)/len(df1_prep)*100
percent_diff[col] = diff
else:
percent_diff[col] = -9999
result_df = pd.DataFrame.from_dict(
percent_diff,
orient='index',
columns=['percent_different']
)
return(result_df)
# determine the optimal lag between two series
[docs]
def review_lags(data1, data2, max_lag=4):
"""
Calculates the Cross-Correlation Function (CCF) to find the optimal time
lag between two time series.
The optimal lag is the time shift that results in the maximum absolute
correlation between the two series.
Parameters
----------
data1 : pd.Series
The primary time series. Must have a DatetimeIndex and be the same
frequency as data2.
data2 : pd.Series
The secondary time series, which is shifted (lagged) relative to data1.
Must have a DatetimeIndex and be the same frequency as data1.
max_lag : int
The maximum number of periods (in both positive and negative directions)
to test for the lag. The function tests lags from -max_lag to +max_lag.
Returns
-------
pd.Series
A Series containing the cross-correlation values.
The index is the lag (in periods), and the values are the correlation
coefficients.
Notes
-----
- **Lag Interpretation:**
- A **positive lag** (k > 0) means **data2 leads data1** by k periods.
- A **negative lag** (k < 0) means **data1 leads data2** by |k| periods.
- **Missing Data:** Pandas' `.corr()` uses pairwise complete observation,
meaning it only correlates non-NA values that align by date/time index.
Shifting `data2` introduces NAs at the start/end, automatically reducing
the sample size, which is an expected behavior of lagged correlation.
"""
try:
lags = np.arange(-max_lag, max_lag + 1)
cross_correlations = []
for lag in lags:
corr = data1.corr(data2.shift(lag))
cross_correlations.append(corr)
ccf_series = pd.Series(cross_correlations, index=lags)
optimal_lag = ccf_series.abs().idxmax()
max_correlation_value = ccf_series[optimal_lag]
# Printing the results for quick review is maintained as requested
if optimal_lag == 0 and max_correlation_value.round(3)==1:
print("Data close to identical")
else:
print(f"Optimal Lag: {optimal_lag} periods")
print(f"Max Cross-Correlation: {max_correlation_value.round(3)}")
except Exception as E:
print(E)
return cross_correlations, optimal_lag, max_correlation_value
# performs more timeseries validation steps
[docs]
def validate_timeseries_data(df: pd.DataFrame, interval_minutes: int, date_format: str = '%Y%m%d%H%M') -> Dict[str, Union[bool, str]]:
"""
Performs several validation checks on a time-series DataFrame with a DatetimeIndex.
This version includes a robust type coercion step (astype(str) + regex cleanup)
to handle the scenario where the START/END columns contain unparsed numeric data
(like floats ending in .0) which causes comparison failures.
Args:
df: The input DataFrame, expected to have a DatetimeIndex and columns
named 'TIMESTAMP_START' and 'TIMESTAMP_END' containing datetime-like data.
interval_minutes: The expected interval between index entries (e.g., 30 or 60).
date_format: The format string for converting string/numeric dates (default is '%Y%m%d%H%M').
Returns:
A dictionary summarizing the results of the three validation checks.
"""
results = {}
# 0. Data Standardization for Robustness (Fixing the Type Mismatch Issue)
# Check if required columns exist
if 'TIMESTAMP_END' not in df.columns or 'TIMESTAMP_START' not in df.columns:
return {
'error': True,
'message': "Required columns 'TIMESTAMP_START' and 'TIMESTAMP_END' not found. Please ensure your DataFrame columns are named exactly 'TIMESTAMP_START' and 'TIMESTAMP_END'."
}
try:
# Robust Conversion: Handles original data being string, int, or float (like 202406190000.0)
# 1. astype(str): Converts any numeric type to string.
# 2. str.replace: Removes trailing '.0' from float conversions.
# 3. pd.to_datetime: Converts the clean string to a proper datetime object.
df['END_dt'] = pd.to_datetime(
df['TIMESTAMP_END'].astype(str).str.replace(r'\.0$', '', regex=True),
format=date_format,
errors='coerce'
)
df['START_dt'] = pd.to_datetime(
df['TIMESTAMP_START'].astype(str).str.replace(r'\.0$', '', regex=True),
format=date_format,
errors='coerce'
)
except Exception as e:
return {
'error': True,
'message': f"Data conversion failed. Check your 'TIMESTAMP_START'/'TIMESTAMP_END' data and 'date_format' argument. Error: {e}"
}
# Check for NaT (Not a Time) values resulting from failed conversions
if df['END_dt'].isna().any() or df['START_dt'].isna().any():
return {
'error': True,
'message': "Data conversion resulted in NaT values (unparsable dates). Check your input data consistency."
}
# 1. Define Timedelta objects for comparison
# Both index interval and duration must match this Timedelta
interval_td = pd.Timedelta(minutes=interval_minutes)
# --- CHECK 1: Index Interval Validation ---
index_diff = df.index.to_series().diff().dropna()
if index_diff.empty:
is_index_consistent = True
index_status = "Index consistency check skipped (1 or 0 rows)."
else:
is_index_consistent = (index_diff == interval_td).all()
if is_index_consistent:
index_status = f"PASS: All index intervals are exactly {interval_minutes} minutes."
else:
first_fail = index_diff[index_diff != interval_td].iloc[0]
fail_index = index_diff[index_diff != interval_td].index[0]
index_status = f"FAIL: Index interval is inconsistent. First discrepancy ends at {fail_index}: Found {first_fail} (Expected {interval_td})."
results['index_interval_check'] = is_index_consistent
results['index_interval_status'] = index_status
# --- CHECK 2: End Value Match (The type mismatch fix) ---
# Does the 'END_dt' value match the index?
is_end_matching_index = (df['END_dt'] == df.index).all()
if is_end_matching_index:
end_status = "PASS: All 'TIMESTAMP_END' values exactly match the DatetimeIndex."
else:
mismatch_series = df[df['END_dt'] != df.index].iloc[0]
# Using the original column name 'TIMESTAMP_END' and 'Index' for better clarity in the fail message
end_status = f"FAIL: First mismatch at index {mismatch_series.name}. TIMESTAMP_END={mismatch_series['TIMESTAMP_END']}, Index={mismatch_series.name}. (Comparison failed due to mismatched time or type)."
results['end_match_check'] = is_end_matching_index
results['end_match_status'] = end_status
# --- CHECK 3: Start-End Difference Validation (UPDATED) ---
# Is the START value exactly 'interval_minutes' before the END value?
duration = df['END_dt'] - df['START_dt']
# Check against the general interval_td (e.g., 30 min or 60 min)
is_duration_consistent = (duration == interval_td).all()
if is_duration_consistent:
duration_status = f"PASS: All TIMESTAMP_START-TIMESTAMP_END durations are exactly {interval_minutes} minutes."
else:
mismatch_index = duration[duration != interval_td].index[0]
first_fail = duration[duration != interval_td].iloc[0]
duration_status = f"FAIL: Duration inconsistent. First mismatch at index {mismatch_index}: Found {first_fail} (Expected {interval_minutes} minutes)."
results['duration_check'] = is_duration_consistent
results['duration_status'] = duration_status
return results
# evaluate offset in time series data in rolling sections
[docs]
def detect_sectional_offsets_indexed(
df1, df2, value_col1, value_col2,
freq='h', max_lag=24, window_size='7D'
):
"""
Evaluates time offsets between two time series data frames ((datetime-indexed) in
rolling sections. Returns the best lag with the best offset for each time window.
Parameters:
- df1, df2: DataFrames with datetime index.
- value_col1: name of the column with numerical values to compare for df1
- value_col2: name of the column with numerical values to compare for df2
- freq: resampling frequency (e.g., 'h' for hourly).
- max_lag: maximum lag (in units of freq) to test.
- window_size: time window for sectional comparison (e.g., '7D' or '12H').
Returns:
- DataFrame with lag information per window.
"""
# Resample both series to ensure regular intervals
s1 = df1[value_col1].resample(freq).mean()
s2 = df2[value_col2].resample(freq).mean()
# Align both series to ensure same timestamps and drop NaNs introduced by resampling
# Keeping only timestamps present in BOTH resampled series.
s1, s2 = s1.align(s2, join='inner')
# Drop any remaining NaNs (from initial data gaps) for cleaner segmenting
# This prevents forward-filling over large gaps, which can be misleading.
combined = pd.DataFrame({'s1': s1, 's2': s2}).dropna()
s1 = combined['s1']
s2 = combined['s2']
# Check if any data remains after cleaning
if len(s1) == 0:
return pd.DataFrame()
# Create window start times
window_starts = pd.date_range(s1.index.min(), s1.index.max(), freq=window_size)
results = []
for start in window_starts:
end = start + pd.to_timedelta(window_size)
# Select the segment from the cleaned (aligned and dropped NA) series
seg1 = s1.loc[start:end]
seg2 = s2.loc[start:end]
# Check for sufficient data points in the window
if len(seg1) < max_lag * 2 or len(seg2) < max_lag * 2:
continue # Skip short or empty windows
lags = np.arange(-max_lag, max_lag + 1)
# Calculate correlations. pd.Series.corr() automatically handles NaNs
# that might arise from shifting (e.g., when aligning a lagged series)
correlations = [seg1.corr(seg2.shift(lag)) for lag in lags]
if all(pd.isna(correlations)):
continue
best_lag = lags[np.nanargmax(correlations)]
best_corr = np.nanmax(correlations)
results.append({
'window_start': start,
'best_lag': best_lag,
'correlation': best_corr
})
result_df = pd.DataFrame(results)
return result_df
# plots the results of detect_sectional_offsets_indexed
[docs]
def plot_sectional_lags_plotly(corr_check, height=400):
"""
Plots the results of the detect_sectional_offsets_indexed function,
showing the best lag for each timeperiod
"""
fig = go.Figure()
fig.add_trace(go.Scatter(
x=corr_check['window_start'],
y=corr_check['best_lag'],
mode='lines+markers',
name='Best Lag',
line=dict(color='royalblue'),
marker=dict(size=6)
))
# Add zero-lag reference line
fig.add_trace(go.Scatter(
x=[corr_check['window_start'].min(), corr_check['window_start'].max()],
y=[0, 0],
mode='lines',
name='Zero Lag',
line=dict(color='gray', dash='dash')
))
fig.update_layout(
title='Sectional Time Lag Detection',
xaxis_title='Window Start Time',
yaxis_title=f'Best Time Lag',
template='plotly_white',
hovermode='x unified',
height=height
)
fig.show()