...
2)Program description
Code Block | ||
---|---|---|
| ||
''' Created on 22 Oct 2019 # Copyright 2005-2018 ECMWF. # This software is licensed under the terms of the Apache Licence Version 2.0 # which can be obtained at http://www.apache.org/licenses/LICENSE-2.0. # In applying this licence, ECMWF does not waive the privileges and immunities # granted to it by virtue of its status as an intergovernmental organisation # nor does it submit to any jurisdiction This is a test program to encode Wigos Synop requires 1) ecCodes version 2.814.1 or above (available at https://confluence.ecmwf.int/display/ECC/Releases) 2) python3.6.8-01 To run the program -i <input bufr >./addWigosProg.py -m <mode [web|json]> -l <logFile> -o <output BUFR file> i synop_multi_subset.bufr -o out_synop_multisubset.bufr -w WIGOS_TEMP_IDENT.csv Uses BUFR version 4 template and adds the WIGOS Identifier 301150 REQUIRES TablesVersionNumber above 28 Author : Roberto Ribas Garcia ECMWF 28/10/2019 Modifications Addedperformance copy_headerimprovement function( touses keepskipExtraKeyAttributes) the header keys from the input message and codes_clone 04/11/2019 ''' from eccodes import * import argparsechanges importfor jsonSYNOP importand reTEMP importmessages pandas as pd import numpy as np import logging import 05/11/2019 ''' from eccodes import * import argparse import json import re import pandas as pd import numpy as np import logging import requests import os def read_cmd_line(): p=argparse.ArgumentParser() p.add_argument("-i","--input",help="input bufr file") p.add_argument("-o","--output",help="output bufr file with wigos") p.add_argument("-m","--mode",choices=["web","json"],help=" wigos source [ json file or web ]") p.add_argument("-l","--logfile",help="log file ") args=p.parse_args() return args def read_oscar_json(jsonFile): with open(jsonFile,"r") as f: jtext=json.load(f) return jtext def read_oscar_web(oscarURL="https://oscar.wmo.int/surface/rest/api/search/station?"): r=requests.get(oscarURL) jtext=json.loads(r.text) return jtext def parse_json_into_dataframe(jtext): ''' parses the JSON from the file wigosJsonFile filters the stations by wigosStationIdentifiers key in the dictionaries ''' wigosStations=[] nowigosStations=[] for d in jtext: if "wigosStationIdentifiers" in d.keys(): wigosStations.append(d) else: nowigosStations.append(d) ''' uses only the wigos 0-20XXX-0-YYYYY (surface) ''' p=re.compile("0-20\d{3}-0-\d{5}") fwigosStations=[] for d in wigosStations: wigosInfo=d["wigosStationIdentifiers"] for e in wigosInfo: if e["primary"]==True: wigosId=e["wigosStationIdentifier"] if p.match(wigosId): wigosParts=wigosId.split("-") d["wigosIdentifierSeries"]=wigosParts[0] d["wigosIssuerOfIdentifier"]=wigosParts[1] d["wigosIssueNumber"]=wigosParts[2] d["wigosLocalIdentifierCharacter"]=wigosParts[3] d["oldID"]=wigosParts[3][-5:] fwigosStations.append(d) df=pd.DataFrame(fwigosStations) df=df[["longitude","latitude","name","wigosStationIdentifiers","wigosIdentifierSeries","wigosIssuerOfIdentifier","wigosIssueNumber", "wigosLocalIdentifierCharacter","oldID"]] return df def get_ident(bid): ''' gets the ident of the message by combining blockNumber and stationNumber keys from the input BUFR file the ident may be single valued or multivalued ( only single valued are considered further) ''' ident=None if ( codes_is_defined(bid, "blockNumber") and codes_is_defined(bid,"stationNumber") ): blockNumber=codes_get_array(bid,"blockNumber") stationNumber=codes_get_array(bid,"stationNumber") if len(blockNumber)==1 and len(stationNumber)==1: ident="{0:02d}{1:03d}".format(int(blockNumber),int(stationNumber)=codes_get_array(bid,"stationNumber") elifif len(blockNumber)==1 and len(stationNumber)!==1: blockNumber=np.repeat(blockNumber,len(stationNumber)) ident=[str("{0:02d}{1:03d}".format(int(b,s)) for b,s in zip(blockNumber,stationNumber) blockNumber),int(stationNumber)) elif len(blockNumber)==1 if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG] and len(stationNumber)!=1: elif lenblockNumber=np.repeat(blockNumber)!=1 and ,len(stationNumber)!=1:) ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG] _LONG] elif len(blockNumber)!=1 return ident def copy_header(bid,obid): ''' and len(stationNumber)!=1: this function copies the header keys and avoids using the default values on the output message ''' bhc=codes_get(bid,"bufrHeaderCentre") codes_set(obid,"bufrHeaderCentre",bhc) bhsc=codes_get(bid,"bufrHeaderSubCentre") codes_set(obid,"bufrHeaderSubCentre",bhsc) usn=codes_get(bid,"updateSequenceNumber") codes_set(obid,"updateSequenceNumber",usn) dc=codes_get(bid,"dataCategory") codes_set(obid,"dataCategory",dc) if codes_is_defined(bid, "internationalDataSubCategory"): idsc=codes_get(bid,"internationalDataSubCategory") codes_set(obid,"internationalDataSubCategory",idsc) dsc=codes_get(bid,"dataSubCategory") codes_set(obid,"dataSubCategory",dsc) year=codes_get(bid,"typicalYear") codes_set(obid,"typicalYear",year) month=codes_get(bid,"typicalMonth") codes_set(obid,"typicalMonth",month) day=codes_get(bid,"typicalDay") codes_set(obid,"typicalDay",day) hour=codes_get(bid,"typicalHour") codes_set(obid,"typicalHour",hour) tmin=codes_get(bid,"typicalMinute") codes_set(obid,"typicalMinute",tmin) sec=codes_get(bid,"typicalSecond") codes_set(obid,"typicalSecond",sec) return def add_wigos_info(ident,bid,wdf,obid): ''' add the wigos information to the message ident pointed by bid the wdf is the whole wigos dataframe and obid is the output bid ''' ident=[str("{0:02d}{1:03d}".format(b,s)) for b,s in zip(blockNumber,stationNumber) if b!=CODES_MISSING_LONG and s!=CODES_MISSING_LONG] return ident def add_wigos_info(ident,bid,wdf,obid): ''' add the wigos information to the message ident pointed by bid the wdf is the whole wigos dataframe and obid is the output bid ''' if codes_is_defined(bid, "shortDelayedDescriptorReplicationFactor"): shortDelayed=codes_get_array(bid,"shortDelayedDescriptorReplicationFactor") else: shortDelayed=None if codes_is_defined(bid, "delayedDescriptorReplicationFactor"): delayedDesc=codes_get_array(bid,"delayedDescriptorReplicationFactor") else: delayedDesc=None if codes_is_defined(bid, "extendedDelayedDescriptorReplicationFactor"): extDelayedDesc=codes_get_array(bid,"extendedDelayedDescriptorReplicationFactor") else: extDelayedDesc=None nsubsets=codes_get(bid,"numberOfSubsets") if compressed=codes_is_definedget(bid, "shortDelayedDescriptorReplicationFactorcompressedData"): shortDelayedmasterTablesVersionNumber=codes_get_array(bid,"shortDelayedDescriptorReplicationFactormasterTablesVersionNumber") elseif masterTablesVersionNumber<28: shortDelayedmasterTablesVersionNumber=None 28 if codes_is_defined(bid, "delayedDescriptorReplicationFactor"): delayedDescunexpandedDescriptors=codes_get_array(bid,"delayedDescriptorReplicationFactor""unexpandedDescriptors") outUD=list(unexpandedDescriptors) else:outUD.insert(0,301150) delayedDesc=None ''' only treat the uncompressed messages with 1 subset for future add nsubsets=codes_get(bid,"numberOfSubsets") compressed=codes_get(bid,"compressedData") treatment of compressed messages with more than 1 subset masterTablesVersionNumber=codes_get(bid,"masterTablesVersionNumber") ''' if masterTablesVersionNumber<28: if compressed==0 and masterTablesVersionNumber=28nsubsets==1: unexpandedDescriptors=codes_get_array(bid,"unexpandedDescriptors")''' outUD=list(unexpandedDescriptors) outUD.insert(0,301150) ''' IMPORTANT, takes into account delayed replications ( all possible cases) to accommodate only treat the uncompressed messagesSYNOP with+ 1TEMP subsetmessages for future add treatment''' of compressed messages with more than 1 subset if shortDelayed is not '''None: if compressed==0 and nsubsets==1: codes_set_array(obid,"inputShortDelayedDescriptorReplicationFactor",shortDelayed) if shortDelayeddelayedDesc is not None: codes_set_array(obid,"inputShortDelayedDescriptorReplicationFactorinputDelayedDescriptorReplicationFactor",shortDelayeddelayedDesc) if delayedDescextDelayedDesc is not None: codes_set_array(obid,"inputDelayedDescriptorReplicationFactorinputExtendedDelayedDescriptorReplicationFactor",delayedDescextDelayedDesc) copy_header(bid,obid) codes_set(obid,"masterTablesVersionNumber",masterTablesVersionNumber) codes_set(obid,"numberOfSubsets",nsubsets) odf=wdf.query("oldID=='{0}'".format(ident)) if not odf.empty: codes_set_array(obid, "unexpandedDescriptors",outUD) wis=odf["wigosIdentifierSeries"].values if len(wis)!=1: wis=wis[0] codes_set(obid,"wigosIdentifierSeries",int(wis)) wid=odf["wigosIssuerOfIdentifier"].values if len(wid)!=1: wid=wid[0] codes_set(obid,"wigosIssuerOfIdentifier",int(wid)) win=odf["wigosIssueNumber"].values if len(win)!=1: win=win[0] codes_set(obid,"wigosIssueNumber",int(win)) wlid=odf["wigosLocalIdentifierCharacter"].values wlid="{0:5}".format(wlid[0]) logging.info(" wlid here {0}".format(wlid)) codes_set(obid,"wigosLocalIdentifierCharacter",str(wlid)) codes_bufr_copy_data(bid,obid) else: logging.info(" wigos {0} is empty for ident {1}".format(ident,odf["wigosLocalIdentifierCharacter"].values)) else: logging.info(" skipping compressed message id {0} with {1} subsets ".format(ident,nsubsets)) return obid def main(): print("ecCodes version {0}".format(codes_get_api_version()):) args=read_cmd_line() logfile=args.logfile logging.basicConfig(filename=logfile,level=logging.INFO,filemode="w") infile=args.input outfile=args.output mode=args.mode if mode=="web": jtext=read_oscar_web() cdirectory=os.getcwd() oscarFile=os.path.join(cdirectory,"oscar.json") with open(oscarFile,"w") as f: json.dump(jtext,f) else: cdirectory=os.getcwd() oscarFile=os.path.join(cdirectory,"oscar.json") with open(oscarFile,"r") as f: jtext=json.load(f) wigosDf=parse_json_into_dataframe(jtext) f=open(infile,"rb") nmsg=codes_count_in_file(f) fout=open(outfile,"wb") for i in range(0,nmsg): obidbid=codes_bufr_new_from_samplesfile("BUFR4"f) bidobid=codes_bufr_new_from_file(fclone(bid) codes_set(bid, 'skipExtraKeyAttributes', 1) codes_set(bid,"unpack",1) ident=get_ident(bid) if ident: logging.info (" \t message {0} ident {1} ".format(i+1,ident)) add_wigos_info(ident,bid, wigosDf, obid) codes_write(obid,fout) else: logging.info ("message {0} rejected ".format(i+1)) codes_release(obid) codes_release(bid) f.close() print (" finished") if __name__ == '__main__': main() |
The program can be called with the following arguments
...
The output file contains 22724 messages View file