Working with Transits and Averaging

Grabbing our libraries

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import lombscargle, find_peaks
plt.style.use('seaborn-v0_8-darkgrid')

Data Generation

PERIOD = 3600
DURATION = 300
START = 1000
DROP = 0.04
SAMPLES = 5000

#times = np.arange(0, 20000, 20)
times = np.random.uniform(0, 20000, size=SAMPLES)
signal = np.where(((times % PERIOD) > START) & ((times % PERIOD) < START + DURATION), 1-DROP, 1) + np.random.normal(0, 0.030, size=times.size)
#to_remove = np.random.randint(0, times.size-1, size=500)
#signal[to_remove] = np.nan

plt.plot(times, signal, '.')
plt.show()

Writing out to file:

df = pd.DataFrame({'time': times, 'signal': signal})
#df.to_csv('./transit_averaging.csv', index=False)

Lomb-Scargle

Spacing is random here, so using Lomb-Scargle:

period_max = 10000
period_min = 10
afreqs = 2 * np.pi * np.linspace(1/period_max, 1/period_min, 10000)
powers = lombscargle(times, signal, afreqs, precenter=True)

plt.plot(afreqs, powers)
plt.xlim(0,0.02)
plt.show()

Finding our peaks, and mapping aliases

peak_ids, peak_info = find_peaks(powers, height=0.015)

first = peak_ids[0]
ffreq = afreqs[first]

plt.plot(afreqs, powers)
plt.vlines(ffreq, peak_info['peak_heights'][0], 0.05, color='C2')
for n in range(2,15):
    plt.vlines(n*ffreq, 0, 0.05, color='C3', lw=2)
plt.xlim(0,0.02)
plt.show()

Period Folding

To actually see our transits, it can be very helpful to do some period-folding:

period = 2*np.pi / ffreq
print(f"{period=}")

norm_phase = (times % period) / period

plt.plot(norm_phase, signal, '.')
plt.show()
period=3573.496301061434

Direct Convolution

sorted_ids = np.argsort(norm_phase)
sorted_times = times[sorted_ids]
sorted_phase = norm_phase[sorted_ids]
sorted_signal = signal[sorted_ids]

SIZE = 100
window = np.ones(SIZE) / SIZE
out = np.convolve(sorted_signal, window, mode='valid')

plt.plot(sorted_phase, sorted_signal, '.')
plt.plot(sorted_phase[SIZE//2:-SIZE//2+1], out)
plt.show()

Pandas

df = pd.DataFrame({"time": times, "signal": signal, "phase": norm_phase})

df.sort_values('phase', inplace=True)

df['rolling'] = df.signal.rolling(SIZE).mean()

plt.plot(df['phase'], df['signal'], '.')
plt.plot(df['phase'], df['rolling'])
plt.show()

Rebinning Phase

STEP = 0.01
bin_labels = pd.cut(df.phase, bins=np.arange(0,1,STEP), labels=False)
df['bin_phase'] = bin_labels*STEP

plt.plot(df['phase'], df['signal'], '.')
plt.plot(df.groupby('bin_phase', dropna=False).signal.mean(), lw=2)
plt.show()

Rebinning Time

df2 = pd.DataFrame({'time': times, 'signal': signal})

STEP = 5
bins = np.arange(0, 20000+STEP, STEP)
bin_labels = pd.cut(df2.time, bins=bins, labels=False)
df2['time_bins'] = bin_labels * STEP
df2.head()

aligned = df2.groupby('time_bins', dropna=False).signal.mean()
aligned = aligned.to_frame().reindex(labels=bins).interpolate(method='linear').dropna()
aligned
signal
time_bins
5 0.983133
10 0.999150
15 1.015166
20 1.012510
25 0.986566
... ...
19980 1.029150
19985 1.009560
19990 0.989971
19995 0.989971
20000 0.989971

4000 rows × 1 columns

plt.plot(aligned, '.')
plt.show()

ft = np.fft.rfft(aligned.signal-aligned.signal.mean())
freqs = np.fft.rfftfreq(aligned.size, STEP)
power = 1 / len(aligned) * abs(ft)**2

plt.plot(freqs, power)
plt.xlim(0,0.005)
plt.show()

peaks_ids, peak_info = find_peaks(power, height=0.004)
if len(peaks_ids) > 0:
    print(f"period = {1 / freqs[peaks_ids[0]]}")
period = 6666.666666666666