# import libraries for data manipulation

import pandas as pd

# Let's read VIIRS csv sample data set into a DataFrame df

df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')

# show how many rows (records) and columns (values per record) we have

print ('FIRMS sample fire data contains %i rows and %i columns' % (df.shape[0], df.shape[1]))
df.shape

FIRMS sample fire data contains 74605 rows and 14 columns

(74605, 14)


# Display information about our data

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74605 entries, 0 to 74604
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   latitude    74605 non-null  float64
 1   longitude   74605 non-null  float64
 2   bright_ti4  74605 non-null  float64
 3   scan        74605 non-null  float64
 4   track       74605 non-null  float64
 5   acq_date    74605 non-null  object 
 6   acq_time    74605 non-null  int64  
 7   satellite   74605 non-null  object 
 8   instrument  74605 non-null  object 
 9   confidence  74605 non-null  object 
 10  version     74605 non-null  object 
 11  bright_ti5  74605 non-null  float64
 12  frp         74605 non-null  float64
 13  daynight    74605 non-null  object 
dtypes: float64(7), int64(1), object(6)
memory usage: 8.0+ MB


# Now, display the first 5 records from our dataset

df.head()


# show the last 5 records from our dataset

df.tail()


# check for unique version values

print ('Checking for unique version values:')
df['version'].unique()

Checking for unique version values:

array(['2.0NRT', '2.0URT'], dtype=object)


# now let's also see how many records are with one version vs another

print ('Version 2.0NRT has %i records and version 2.0URT has %i records' % (len(df[df['version']=='2.0NRT']), len(df[df['version']=='2.0URT'])))

Version 2.0NRT has 69507 records and version 2.0URT has 5098 records


import pandas as pd
df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')

# From 'Regional Coordinates' we can get Canada's approximate bounding box
# Canada coordinates: West South, East North as -150 40, -49 79

# copy() - will create a duplicate dataset with Canada data only
df_canada = df[(df['longitude'] >= -150) & (df['latitude'] >= 40) & (df['longitude'] <= -49) & (df['latitude'] <= 79)].copy()
print ('Canada subset contains %i records.' % len(df_canada))

Canada subset contains 14045 records.


# Let's also create another one for Australia and New Zealand for our later example
# Australia and New Zealand coordinates: West South, East North as 110 -55, 180 -10

# alternatively, we can assign extent values into an array
extent = [110, -55, 180, -10]
df_aus_nz = df[(df['longitude'] >= extent[0]) & (df['latitude'] >= extent[1]) & (df['longitude'] <= extent[2]) & (df['latitude'] <= extent[3])].copy()
print ('Australia and New Zealand subset contains %i records.' % len(df_aus_nz))

Australia and New Zealand subset contains 2999 records.


# Let's subset our data to values with confidence normal 'n' and high 'h'
# and fire radiative power (frp) higher or equal to 5

df_custom_day = df[((df['confidence'] == 'n') | (df['confidence'] == 'h')) & (df['frp'] >= 5) & (df['daynight'] == 'D')]
print ('Day time detection with normal and high confidence and frp > 5 contains %i records' %  len(df_custom_day))

# the same as above but with night detections

df_custom_night = df[((df['confidence'] == 'n') | (df['confidence'] == 'h')) & (df['frp'] >= 5) & (df['daynight'] == 'N')]
print ('Night time detections with normal and high confidence and frp > 5 contains %i records' %  len(df_custom_night))

Day time detection with normal and high confidence and frp > 5 contains 35504 records
Night time detections with normal and high confidence and frp > 5 contains 2683 records


# reload just in case steps were missed
import pandas as pd
df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')
df_canada = df[(df['longitude'] >= -150) & (df['latitude'] >= 40) & (df['longitude'] <= -49) & (df['latitude'] <= 79)].copy()
df_aus_nz = df[(df['longitude'] >= 110) & (df['latitude'] >= -55) & (df['longitude'] <= 180) & (df['latitude'] <=-10)].copy()


# let's create datetime column in our dataset combining acq_date and acq_time

df_canada['acq_datetime'] = pd.to_datetime(df_canada['acq_date'] + ' ' + df_canada['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d %H%M')

# view random 5 records to confirm our conversion.
# note: seconds are set to 0; as our dataset doesn't provide information about seconds

print ('Canada sample datetime info:')
df_canada['acq_datetime'].sample(5)

Canada sample datetime info:

72664   2023-07-12 19:49:00
74438   2023-07-12 19:50:00
19686   2023-07-12 08:16:00
25776   2023-07-12 10:00:00
74238   2023-07-12 19:50:00
Name: acq_datetime, dtype: datetime64[ns]


# Now let's see the minimum and maximum datetime range available for Canada

print ('Canada datetime value range: %s to %s' % (str(df_canada['acq_datetime'].min()), str(df_canada['acq_datetime'].max())))

Canada datetime value range: 2023-07-12 04:53:00 to 2023-07-12 19:50:00


# import timezone library
import pytz

print('Canada TimeZones')
for timeZone in pytz.country_timezones['CA']:
    print(timeZone)

Canada TimeZones
America/St_Johns
America/Halifax
America/Glace_Bay
America/Moncton
America/Goose_Bay
America/Blanc-Sablon
America/Toronto
America/Iqaluit
America/Atikokan
America/Winnipeg
America/Resolute
America/Rankin_Inlet
America/Regina
America/Swift_Current
America/Edmonton
America/Cambridge_Bay
America/Yellowknife
America/Inuvik
America/Creston
America/Dawson_Creek
America/Fort_Nelson
America/Whitehorse
America/Dawson
America/Vancouver


# Let's compare our minimum and maximum date-time range converted using 3 different Canada time zones.
# We will use:
# Original GMT
# St_Johns (GMT-2:30)
# Toronto (GMT-4:00)
# Vancouver (GMT-7:00)

print ('Canada GMT timezone datetime value range: %s to %s' % (str(df_canada['acq_datetime'].min()), str(df_canada['acq_datetime'].max())))
print ('Canada St Johns timezone datetime value range: %s to %s' % (str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/St_Johns').min()), str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/St_Johns').max())))
print ('Canada Toronto timezone datetime value range: %s to %s' % (str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Toronto').min()), str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Toronto').max())))
print ('Canada Vancouver timezone datetime value range: %s to %s' % (str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Vancouver').min()), str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Vancouver').max())))

Canada GMT timezone datetime value range: 2023-07-12 04:53:00 to 2023-07-12 19:50:00
Canada St Johns timezone datetime value range: 2023-07-12 02:23:00-02:30 to 2023-07-12 17:20:00-02:30
Canada Toronto timezone datetime value range: 2023-07-12 00:53:00-04:00 to 2023-07-12 15:50:00-04:00
Canada Vancouver timezone datetime value range: 2023-07-11 21:53:00-07:00 to 2023-07-12 12:50:00-07:00


extent = [110, -55, 180, -10]
df_aus_nz = df[(df['longitude'] >= extent[0]) & (df['latitude'] >= extent[1]) & (df['longitude'] <= extent[2]) & (df['latitude'] <= extent[3])].copy()

# Now repeat for Australia and New Zealand

df_aus_nz['acq_datetime'] = pd.to_datetime(df_aus_nz['acq_date'] + ' ' + df_aus_nz['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d %H%M')

print ('Australia and New Zealand sample datetime info:')
df_aus_nz['acq_datetime'].sample(5)

Australia and New Zealand sample datetime info:

12699   2023-07-12 04:17:00
11125   2023-07-12 04:17:00
12673   2023-07-12 04:17:00
10431   2023-07-12 04:14:00
10470   2023-07-12 04:14:00
Name: acq_datetime, dtype: datetime64[ns]


# View Australia and New Zealand timezones

print('Australia TimeZones')
for timeZone in pytz.country_timezones['AU']:
    print(timeZone)
print('New Zealand TimeZones')
for timeZone in pytz.country_timezones['NZ']:
    print(timeZone)

Australia TimeZones
Australia/Lord_Howe
Antarctica/Macquarie
Australia/Hobart
Australia/Melbourne
Australia/Sydney
Australia/Broken_Hill
Australia/Brisbane
Australia/Lindeman
Australia/Adelaide
Australia/Darwin
Australia/Perth
Australia/Eucla
New Zealand TimeZones
Pacific/Auckland
Pacific/Chatham


# Finally compare GMT with Australia and New Zealand

print ('Australia and New Zealand GMT timezone datetime value range: %s to %s' % (str(df_aus_nz['acq_datetime'].min()), str(df_aus_nz['acq_datetime'].max())))
print ('Australia Sydney timezone datetime value range: %s to %s' % (str(df_aus_nz['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Australia/Sydney').min()), str(df_aus_nz['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Australia/Sydney').max())))
print ('Australia Sydney timezone datetime value range: %s to %s' % (str(df_aus_nz['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Pacific/Auckland').min()), str(df_aus_nz['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Pacific/Auckland').max())))

Australia and New Zealand GMT timezone datetime value range: 2023-07-12 02:26:00 to 2023-07-12 15:31:00
Australia Sydney timezone datetime value range: 2023-07-12 12:26:00+10:00 to 2023-07-13 01:31:00+10:00
Australia Sydney timezone datetime value range: 2023-07-12 14:26:00+12:00 to 2023-07-13 03:31:00+12:00


for timeZone in pytz.country_timezones['US']:
    print(timeZone)

America/New_York
America/Detroit
America/Kentucky/Louisville
America/Kentucky/Monticello
America/Indiana/Indianapolis
America/Indiana/Vincennes
America/Indiana/Winamac
America/Indiana/Marengo
America/Indiana/Petersburg
America/Indiana/Vevay
America/Chicago
America/Indiana/Tell_City
America/Indiana/Knox
America/Menominee
America/North_Dakota/Center
America/North_Dakota/New_Salem
America/North_Dakota/Beulah
America/Denver
America/Boise
America/Phoenix
America/Los_Angeles
America/Anchorage
America/Juneau
America/Sitka
America/Metlakatla
America/Yakutat
America/Nome
America/Adak
Pacific/Honolulu


# Summarizing the tutorial:

import pandas as pd
df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')
df_canada = df[(df['longitude'] >= -150) & (df['latitude'] >= 40) & (df['longitude'] <= -49) & (df['latitude'] <= 79)].copy()

# create Canada subset
df_canada = df[(df['longitude'] >= -150) & (df['latitude'] >= 40) & (df['longitude'] <= -49) & (df['latitude'] <= 79)].copy()

print ('Canada subset contains %i records.' % df_canada.count()[0])

# create datetime column
df_canada['acq_datetime'] = pd.to_datetime(df_canada['acq_date'] + ' ' + df_canada['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d %H%M')

print ('Canada/Toronto timezone datetime value range: %s to %s \n\n' % (str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Toronto').min()), str(df_canada['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('America/Toronto').max())))

df_canada.sample(5)

Canada subset contains 14045 records.
Canada/Toronto timezone datetime value range: 2023-07-12 00:53:00-04:00 to 2023-07-12 15:50:00-04:00


import pandas as pd

df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')


# Question 1: using Regional Coordinates, how many fire detections are there for USA (Conterminous) and Hawaii?

extent = [?,?,?,?] # get USA (Conterminous) and Hawaii extent
df_usa = df[(df['longitude'] >= extent[0]) & (df['latitude'] >= extent[1]) & (df['longitude'] <= extent[2]) & (df['latitude'] <= extent[3])].copy()

total_fires = len(df_usa) # provide count of records for df_usa

print ('Answer 1: There are %i detections for USA (Conterminous) and Hawaii' % total_fires)


# Question 2: applying 'Pacific/Honolulu' timezone, what is the minimum and maximum date range of USA (Conterminous) and Hawaii fire detections?

df_usa['acq_datetime'] = ? # set this value
min_date = ? # get minimum acq_datetime from df_usa, applying 'Pacific/Honolulu' time zone
max_date = ? # get maximum acq_datetime from df_usa, applying 'Pacific/Honolulu' time zone

print ('Answer 2: For USA (Conterminous) and Hawaii, using Honolulu timezone, minimum date is %s and maximum date is %s' % (str(min_date), str(max_date)) )


# Question 3: How many USA (Conterminous) and Hawaii records are ultra real time? (Hint: use 'version' column and set it to '2.0URT')

urt_records = len(df_usa[?]) # set the query
print ('There are %i ultra real time records.' % urt_records)


import pandas as pd
import pytz

df = pd.read_csv('https://firms.modaps.eosdis.nasa.gov/content/notebooks/sample_viirs_snpp_071223.csv')
extent = [-160.5,17.5, -63.8,50] # get USA (Conterminous) and Hawaii extent
df_usa = df[(df['longitude'] >= extent[0]) & (df['latitude'] >= extent[1]) & (df['longitude'] <= extent[2]) & (df['latitude'] <= extent[3])].copy()

total_fires = len(df_usa) # provide count of records for df_usa

print ('Answer 1: There are %i detections for USA (Conterminous) and Hawaii' % total_fires)

df_usa['acq_datetime'] = pd.to_datetime(df_usa['acq_date'] + ' ' + df_usa['acq_time'].astype(str).str.zfill(4), format='%Y-%m-%d %H%M')
min_date = df_usa['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Pacific/Honolulu').min()
max_date = df_usa['acq_datetime'].dt.tz_localize('GMT').dt.tz_convert('Pacific/Honolulu').max()

print ('Answer 2: For USA (Conterminous) and Hawaii, using Honolulu timezone, minimum date is %s and maximum date is %s' % (str(min_date), str(max_date)) )

urt_records = len(df_usa[df_usa['version'] == '2.0URT'])
print ('There are %i ultra real time records.' % urt_records)

Answer 1: There are 900 detections for USA (Conterminous) and Hawaii
Answer 2: For USA (Conterminous) and Hawaii, using Honolulu timezone, minimum date is 2023-07-11 20:37:00-10:00 and maximum date is 2023-07-12 09:46:00-10:00
There are 324 ultra real time records.

	latitude	longitude	bright_ti4	scan	track	acq_date	acq_time	satellite	instrument	confidence	version	bright_ti5	frp	daynight
0	0.05836	29.59085	295.64	0.38	0.59	2023-07-12	3	N	VIIRS	n	2.0NRT	275.15	0.83	N
1	0.48765	31.50760	296.73	0.51	0.66	2023-07-12	3	N	VIIRS	n	2.0NRT	275.15	0.56	N
2	2.15227	13.94524	305.26	0.51	0.49	2023-07-12	3	N	VIIRS	n	2.0NRT	287.94	1.08	N
3	2.15681	13.94618	319.05	0.51	0.49	2023-07-12	3	N	VIIRS	n	2.0NRT	288.77	1.81	N
4	2.15754	13.94131	301.13	0.51	0.50	2023-07-12	3	N	VIIRS	n	2.0NRT	288.17	1.81	N

	latitude	longitude	bright_ti4	scan	track	acq_date	acq_time	satellite	instrument	confidence	version	bright_ti5	frp	daynight
74600	61.42408	-110.40578	350.48	0.4	0.4	2023-07-12	1950	N	VIIRS	n	2.0URT	309.39	16.01	D
74601	61.42510	-110.39867	336.03	0.4	0.4	2023-07-12	1950	N	VIIRS	l	2.0URT	308.08	32.98	D
74602	61.42733	-110.40780	328.53	0.4	0.4	2023-07-12	1950	N	VIIRS	n	2.0URT	298.15	16.01	D
74603	61.42834	-110.40069	338.45	0.4	0.4	2023-07-12	1950	N	VIIRS	n	2.0URT	302.81	32.98	D
74604	61.42936	-110.39356	339.52	0.4	0.4	2023-07-12	1950	N	VIIRS	n	2.0URT	306.58	32.98	D

	latitude	longitude	bright_ti4	scan	track	acq_date	acq_time	satellite	instrument	confidence	version	bright_ti5	frp	daynight	acq_datetime
38386	61.98559	-134.58316	299.32	0.39	0.36	2023-07-12	1138	N	VIIRS	n	2.0NRT	274.70	1.57	N	2023-07-12 11:38:00
24289	61.97328	-134.62656	297.51	0.50	0.66	2023-07-12	958	N	VIIRS	n	2.0NRT	277.51	4.05	N	2023-07-12 09:58:00
17784	58.17107	-67.48384	304.70	0.43	0.38	2023-07-12	633	N	VIIRS	n	2.0NRT	287.49	2.35	N	2023-07-12 06:33:00
19342	64.22210	-113.82372	355.02	0.68	0.74	2023-07-12	813	N	VIIRS	n	2.0NRT	284.98	9.25	N	2023-07-12 08:13:00
17939	53.12735	-77.04724	296.30	0.51	0.49	2023-07-12	635	N	VIIRS	n	2.0NRT	279.85	1.76	N	2023-07-12 06:35:00

NASA FIRMS - Data Ingest and Manipulation in Python

Data Ingest¶

Subsetting¶

Datetime Conversion and Timezones¶

Summary¶

Exercise¶

Solution¶

Errors¶