Jed Rembold
June 18, 2025
| Setting | Description | Type |
|---|---|---|
retries |
How many times to retry if something fails | int |
retry_delay |
How long to wait until trying again | duration |
retry_exponential_backoff |
Should each retry wait exponentially longer? | bool |
max_retry_delay |
Max delay between retries (caps the exponential backoff) | duration |
() after
the @task decorator@task(
retries = 3,
retry_delay = pendulum.duration(minutes=1),
retry_exponential_backoff = True
)
def my_tricky_task():
|||Task logic and code|||
from pendulum import duration, datetime
defaults = {
'retries': 3,
'retry_delay': duration(minutes=1),
'retry_exponential_backoff': True,
'max_retry_delay': duration(hours=1)
}
@dag(
start_date = datetime(2025, 6, 18),
schedule = '0 0 * * *',
default_args = defaults
)
def my_great_dag():
|||All my tasks which will inherit the defaults|||
execution_timeout=|||some duration|||
parameteron_success_callback)on_failure_callback)on_retry_callback)on_success_callback)on_failure_callback)context passed into it as the only
parameterutils.py file that would enable Discord
reporting to your private group channel on Discordfrom utils import discord_alert
@dag(
...,
on_failure_callback = discord_alert
)
def my_broken_dag():
|||All the tasks|||
airflow.cfg
(right)send_email in custom
callbackemail,
email_on_failure,
email_on_retry settings in default args[email]
email_backend = airflow.utils.email.send_email_smtp
[smtp]
smtp_host = smtp.gmail.com
smtp_starttls = True
smtp_ssl = False
smtp_user = your_email@gmail.com
smtp_password = your_app_password # NOT your Gmail password!
smtp_port = 587
smtp_mail_from = your_email@gmail.com
SQLCheckOperatorSQLValueCheckOperatorSQLThresholdCheckOperatorSQLIntervalCheckOperatorNULL values in
important columnshttp_task = HttpOperator(
task_id="get_example_data",
method="GET",
endpoint="api/data",
http_conn_id="my_api_conn",
headers={
"Accept": "application/json"
},
response_filter= (
lambda response: response.json()
),
log_response=True
)
There is an operator just called
HTTPOperator that you can utilize
A bit cumbersome to get the acquired data into another task, as you must use the XCom method
context['ti'].xcom_pull(
task_ids="get_example_data"
)Probably only reasonable for very simple API calls (especially calls that don’t return anything)
When you want more control (which will be most of the time), you
will probably want to use Python’s requests
library
import requests
def fetch_users():
url = "https://example.com/api/users"
resp = requests.get(url)
if resp.status_code != 200:
raise Exception(
f"Request failed: {resp.status_code} - {resp.text}"
)
return resp.json()Some APIs may require special header information or you may want to provide extra query parameters
The requests library makes this very straightforward
headers = {
"Authorization": f"Bearer {API_KEY}"
}
params = {
"page": 2,
"sort": "desc"
}
resp = requests.get(|||url|||, params=params, headers=headers)Requests will take care of handling all the parameter syntax for you!
Parse it manually, looping over keys/entries
Use Pandas:
pd.DataFrame.from_dict(|||your json|||)
Write to to file, then use DuckDB
with open('temp.json', 'w') as fh:
json.dump(|||your json|||, fh)
db = duckdb.connect()
out = db.sql('SELECT * FROM 'temp.json')
out.show()def get_all_pages(api_url, page_size):
all_results = []
current_page = 1
total_pages = None
while total_pages is None or current_page <= total_pages:
params = {
'page': current_page,
'page_size': page_size
}
try:
resp = requests.get(url, params=params)
resp.raise_for_status()
except Exception as e:
raise RuntimeError(f"API request failed on page {current_page}")
payload = resp.json()
data = payload.get("data", [])
total_pages = payload.get("total_pages")
all_results.extend(data)
current_page += 1
return all_results
try/except block
in a for looptenacity
libraryfrom tenacity import retry, stop_after_attempt
from tenacity import wait_exponential, retry_if_exception
@retry(
stop = stop_after_attempt(5),
wait = wait_exponential(multiplier=2, min=1, max=60),
retry = retry_if_exception(Exception),
)
def fetch_page(url, params):
resp = requests.get(url, params=params)
resp.raise_for_status()
print(resp.json())
sources.advde:8090/api/docs gets you all
the documentation for the APIsources.advde:8090/api/|||day|||/fares
gets you the subscribed fares on a given day