#!/usr/bin/env python3 # # create_metar_dat.py # # Script to create metar.dat.gz by checking tgftp.nws.noaa.gov for stations # that have reported recently. # # Usage: create_metar_dat.py [OUTPUT_FILENAME] # # If OUTPUT_FILENAME is omitted, creates metar.dat.gz in working directory. # # Copyright (c) 2016 Richard Senior # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. import datetime import dateutil.parser import gzip import os import re import sys import urllib.request filename = "metar.dat.gz" if len(sys.argv) <= 1 else sys.argv[1] max_age_days = 7 url = "http://tgftp.nws.noaa.gov/data/observations/metar/stations/" now = datetime.datetime.now() script = os.path.basename(__file__) class ParseException(Exception): pass def stations(): for line in urllib.request.urlopen(url).readlines(): if b".TXT" in line: yield line def active_stations(): cutoff = now - datetime.timedelta(max_age_days) icao_pattern = re.compile("[A-Z][A-Z0-9]{3}") for station in stations(): tokens = station.split() last_modified = tokens[2][14:25] if dateutil.parser.parse(last_modified) > cutoff: icao = tokens[1][16:20:].decode() # Sanity check on parsed ICAO code. if not icao_pattern.match(icao): raise ParseException("Dubious ICAO code: " + icao) yield icao # Run through the active stations before opening the file so that a file # is not created (or overwritten) if an exception is thrown. active = [icao + "\n" for icao in active_stations()] # Sanity check on the number of stations. if len(active) < 5000: raise ParseException("Expected more than " + str(len(active)) + " stations") with gzip.open(filename, "wt") as f: print("# List of airports known to have metar data available", file=f) print("# Generated by", script, "on", now.strftime("%Y-%m-%d"), file=f) print("#", url, file=f) f.writelines(active) print(script + ": Wrote", len(active), "stations to", filename)