Source code for pygimli.physics.ert.importData

"""Import routines several ERT file formats."""
import re
import numpy as np
import pygimli as pg


[docs] def load(fileName, verbose=False, **kwargs): """Shortcut to load ERT data. Import Data and try to assume the file format. Additionally to unified data format we support the wide-spread res2dinv format as well as ASCII column files generated by the processing software of various instruments (ABEM LS, Syscal Pro, Resecs, ?) If this fails, install pybert and use its auto importer pybert.importData. Parameters ---------- fileName: str Returns ------- data: pg.DataContainer """ data = pg.load(fileName) if isinstance(data, pg.DataContainerERT): return data try: pg.info("could not read unified data format for ERT ... try res2dinv") data = importRes2dInv(fileName) return data except Exception: pg.info("could not read res2dinv ... try Ascii columns") try: data = importAsciiColumns(fileName) return data except Exception as e: pg.info("Failed importing Ascii column file. Consider using pybert.") pg.info(e) if verbose: pg.info("Try to import using pybert .. if available") pb = pg.optImport('pybert') data = pb.importData(fileName) if kwargs.pop('ensureKRhoa', False): if not data.haveData('k'): data.createGeometricFactors() if data.haveData('r'): data['rhoa'] = data['r'] * data['k'] elif data.haveData('u') and data.haveData('i'): data['rhoa'] = data['u'] / data['i'] * data['k'] if isinstance(data, pg.DataContainerERT): return data pg.critical("Can't import ERT data file.", fileName)
def importRes2dInv(filename, verbose=False, return_header=False): """Read res2dinv format file. Parameters ---------- filename : str verbose : bool [False] return_header : bool [False] Returns ------- pg.DataContainerERT and (in case of return_header=True) header dictionary Format ------ str - title float - unit spacing [m] int - Array Number (1-Wenner, 3-Dipole-dipole atm only) int - Number of Datapoints float - x-location given in terms of first electrode use 1 if mid-point location is given int - 0 for no IP, use 1 if IP present str - Phase Angle if IP present str - mrad if IP present 0,90.0 - if IP present dataBody """ def getNonEmptyRow(i, comment='#'): s = next(i) while s[0] is comment: s = next(i) return s.split('\r\n')[0] # def getNonEmptyRow(...) with open(filename, 'r') as fi: content = fi.readlines() it = iter(content) header = {} header['name'] = getNonEmptyRow(it, comment=';') header['spacing'] = float(getNonEmptyRow(it, comment=';')) typrow = getNonEmptyRow(it, comment=';') typ = int(typrow.rstrip('\n').rstrip('R').rstrip('L')) if typ == 11: # independent electrode positions header['subtype'] = int(getNonEmptyRow(it, comment=';')) header['dummy'] = getNonEmptyRow(it, comment=';') isR = int(getNonEmptyRow(it, comment=';')) nData = int(getNonEmptyRow(it, comment=';')) xLoc = float(getNonEmptyRow(it, comment=';')) hasIP = int(getNonEmptyRow(it, comment=';')) if hasIP: header['ipQuantity'] = getNonEmptyRow(it, comment=';') header['ipUnit'] = getNonEmptyRow(it, comment=';') header['ipData'] = getNonEmptyRow(it, comment=';') ipline = header['ipData'].rstrip('\n').rstrip('\r').split(' ') if len(ipline) > 2: # obviously spectral data? header['ipNumGates'] = int(ipline[0]) header['ipDelay'] = float(ipline[1]) header['onTime'] = float(ipline[-2]) header['offTime'] = float(ipline[-1]) header['ipDT'] = np.array(ipline[2:-2], dtype=float) header['ipGateT'] = np.cumsum(np.hstack((header['ipDelay'], header['ipDT']))) data = pg.DataContainerERT() data.resize(nData) if typ == 9 or typ == 10: raise Exception("Don't know how to read:" + str(typ)) if typ in [11, 12, 13]: # mixed array res = pg.Vector(nData, 0.0) ip = pg.Vector(nData, 0.0) specIP = [] for i in range(nData): vals = getNonEmptyRow(it, comment=';').replace(',', ' ').split() # row starts with 4 if int(vals[0]) == 4: eaID = data.createSensor(pg.Pos(float(vals[1]), float(vals[2]))) ebID = data.createSensor(pg.Pos(float(vals[3]), float(vals[4]))) emID = data.createSensor(pg.Pos(float(vals[5]), float(vals[6]))) enID = data.createSensor(pg.Pos(float(vals[7]), float(vals[8]))) elif int(vals[0]) == 3: eaID = data.createSensor(pg.Pos(float(vals[1]), float(vals[2]))) ebID = -1 emID = data.createSensor(pg.Pos(float(vals[3]), float(vals[4]))) enID = data.createSensor(pg.Pos(float(vals[5]), float(vals[6]))) elif int(vals[0]) == 2: eaID = data.createSensor(pg.Pos(float(vals[1]), float(vals[2]))) ebID = -1 emID = data.createSensor(pg.Pos(float(vals[3]), float(vals[4]))) enID = -1 else: raise Exception('dont know how to handle row', vals[0]) res[i] = float(vals[int(vals[0])*2+1]) if hasIP: # ip[i] = float(vals[int(vals[0])*2+2]) ipCol = int(vals[0])*2+2 ip[i] = float(vals[ipCol]) if 'ipNumGates' in header: specIP.append(vals[ipCol:]) data.createFourPointData(i, eaID, ebID, emID, enID) if isR: data.set('r', res) else: data.set('rhoa', res) if hasIP: data.set('ip', ip) if 'ipNumGates' in header: A = np.array(specIP, dtype=float) A[A > 1000] = -999 A[A < -1000] = -999 for i in range(header['ipNumGates']): data.set('ip'+str(i+1), A[:, i]) else: # not type 11-13 # amount of values per column per typ nntyp = [0, 3, 3, 4, 3, 3, 4, 4, 3, 0, 0, 8, 10] nn = nntyp[typ] + hasIP # number of columns dataBody = np.zeros((nn, nData)) for i in range(nData): vals = getNonEmptyRow(it, comment=';').replace(',', ' ').split() dataBody[:, i] = np.array(vals, dtype=float) XX = dataBody[0] EL = dataBody[1] SP = pg.Vector(nData, 1.0) if nn - hasIP == 4: SP = dataBody[2] AA = None BB = None NN = None MM = None if typ == 1: # Wenner AA = XX - xLoc * EL * 1.5 MM = AA + EL NN = MM + EL BB = NN + EL elif typ == 2: # Pole-Pole AA = XX - xLoc * EL * 0.5 MM = AA + EL elif typ == 3: # Dipole-Dipole AA = XX - xLoc * EL * (SP / 2. + 1.) BB = AA + EL MM = BB + SP * EL NN = MM + EL elif typ == 3: # Dipole-Dipole AA = XX - xLoc * EL * (SP / 2. + 1.) BB = AA + EL MM = BB + SP * EL NN = MM + EL elif typ == 4: # WENNER-BETA AA = XX - xLoc * EL * 1.5 BB = AA + EL MM = BB + EL NN = MM + EL elif typ == 5: # WENNER-GAMMA AA = XX - xLoc * EL * 1.5 MM = AA + EL BB = MM + EL NN = BB + EL elif typ == 6: # POLE-DIPOLE AA = XX - xLoc * SP * EL - (SP - 1.) * (SP < 0.) * EL MM = AA + SP * EL NN = MM + np.sign(SP) * EL elif typ == 7: # SCHLUMBERGER AA = XX - xLoc * EL * (SP + 0.5) MM = AA + SP * EL NN = MM + EL BB = NN + SP * EL else: raise Exception('Datatype ' + str(typ) + ' not yet suppoted') for i in range(len(AA)): if AA is not None: eaID = data.createSensor(pg.Pos(AA[i], 0.0)) else: eaID = -1 if BB is not None: ebID = data.createSensor(pg.Pos(BB[i], 0.0)) else: ebID = -1 if MM is not None: emID = data.createSensor(pg.Pos(MM[i], 0.0)) else: emID = -1 if NN is not None: enID = data.createSensor(pg.Pos(NN[i], 0.0)) else: enID = -1 data.createFourPointData(i, eaID, ebID, emID, enID) data.set('rhoa', dataBody[nn - hasIP - 1]) if hasIP: data.set('ip', dataBody[nn - 1]) row = getNonEmptyRow(it, comment=';') if row.lower().startswith('topography'): row = getNonEmptyRow(it, comment=';') istopo = int(row) if istopo: ntopo = int(getNonEmptyRow(it, comment=';')) ap = data.additionalPoints() for i in range(ntopo): strs = getNonEmptyRow(it, comment=';').replace(',', ' ').split() ap.push_back(pg.Pos([float(s) for s in strs])) data.setAdditionalPoints(ap) data.sortSensorsX() data.sortSensorsIndex() if return_header: return data, header else: return data # def importRes2dInv(...) def importAsciiColumns(filename, verbose=False, return_header=False): """Import any ERT data file organized in columns with column header. Input can be: * Terrameter LS or SAS Ascii Export format, e.g. Time MeasID DPID Channel A(x) A(y) A(z) B(x) B(y) B(z) M(x) M(y) M(z) N(x) N(y) N(z) F(x) F(y) F(z) Note I(mA) Uout(V) U(V) SP(V) R(O) Var(%) Rhoa Cycles Pint Pext(V) T(°C) Lat Long 2016-09-14 07:01:56 73 7 1 8 1 1 20 1 1 12 1 1 16 1 1 14 1 2.076 99.8757 107.892 0.0920761 0 0.921907 0.196302 23.17 1 12.1679 12.425 42.1962 0 0 * Resecs Output format """ data = pg.DataContainerERT() header = {} with open(filename, 'r', encoding='iso-8859-15') as fi: content = fi.readlines() if content[0].startswith('Injection'): # Resecs lead-in for n in range(20): if len(content[n]) < 2: break content = content[n+1:] if content[0].startswith('Filename'): # ABEM lead-in for n, line in enumerate(content): if line.find("MeasID") >= 0: break for i in range(n): sp = content[i].split(":") if len(sp) > 1: tok = sp[0].lstrip("\t").lstrip("- ") header[tok] = sp[1].rstrip("\n").rstrip("\r") for last in range(len(content)-1, -1, -1): if content[last].find("---") == 0: print(content[last]) last -= 1 print(content[last]) while len(content[last]) < 3: last -= 1 last += 1 break if last <= 1: last = len(content) content = content[n:last] d = readAsDictionary(content, sep='\t') if len(d) < 2: d = readAsDictionary(content) nData = len(next(iter(d.values()))) data.resize(nData) if 'Spa.1' in d: # Syscal Pro abmn = ['Spa.1', 'Spa.2', 'Spa.3', 'Spa.4'] if verbose: pg.debug("detected Syscalfile format") elif 'A(x)' in d: # ABEM Terrameter abmn = ['A', 'B', 'M', 'N'] if verbose: pg.debug("detected ABEM file format") elif 'xA' in d: # Workbench TX2 processed data abmn = ['xA', 'xB', 'xM', 'xN'] if verbose: pg.debug("detected Workbench file format") elif 'C1(x)' in d or 'C1(xm)' in d: # Resecs abmn = ['C1', 'C2', 'P1', 'P2'] if verbose: pg.debug("detected RESECS file format") else: pg.debug("no electrode positions found!") pg.debug("Keys are:", d.keys()) raise Exception("No electrode positions found!") for i in range(nData): if abmn[0]+'(z)' in d: eID = [data.createSensor([d[se+'(x)'][i], d[se+'(y)'][i], d[se+'(z)'][i]]) for se in abmn] elif abmn[0]+'(zm)' in d: eID = [data.createSensor([d[se+'(xm)'][i], d[se+'(ym)'][i], d[se+'(zm)'][i]]) for se in abmn] elif abmn[0]+'(y)' in d: eID = [data.createSensor([d[se+'(x)'][i], d[se+'(y)'][i], 0.]) for se in abmn] elif abmn[0]+'(ym)' in d: eID = [data.createSensor([d[se+'(xm)'][i], d[se+'(ym)'][i], 0.]) for se in abmn] elif abmn[0]+'(x)' in d: eID = [data.createSensor([d[se+'(x)'][i], 0., 0.]) for se in abmn] elif abmn[0]+'(xm)' in d: eID = [data.createSensor([d[se+'(xm)'][i], 0., 0.]) for se in abmn] else: eID = [data.createSensor([d[se][i], 0., 0.]) for se in abmn] data.createFourPointData(i, *eID) # data.save('tmp.shm', 'a b m n') tokenmap = {'I(mA)': 'i', 'I': 'i', 'In': 'i', 'Vp': 'u', 'VoltageV': 'u', 'U': 'u', 'U(V)': 'u', 'UV': 'u', 'R(Ohm)': 'r', 'RO': 'r', 'R(O)': 'r', 'Res': 'r', 'Rho': 'rhoa', 'AppROhmm': 'rhoa', 'Rho-a(Ohm-m)': 'rhoa', 'Rho-a(Om)': 'rhoa', 'Var(%)': 'err', 'D': 'err', 'Dev.': 'err', 'Dev': 'err', 'M': 'ma', 'P': 'ip', 'IP sum window': 'ip', 'Time': 't'} # Unit conversions (mA,mV,%), partly automatically assumed unitmap = {'I(mA)': 1e-3, 'Var(%)': 0.01, # ABEM 'U': 1e-3, 'I': 1e-3, 'D': 0.01, # Resecs 'Dev.': 0.01, 'In': 1e-3, 'Vp': 1e-3} # Syscal abmn = ['a', 'b', 'm', 'n'] if 'Cycles' in d: d['stacks'] = d['Cycles'] for key in d.keys(): vals = np.asarray(d[key]) if key.startswith('IP sum window'): # there is a trailing number key = 'IP sum window' # apparently not working if np.issubdtype(vals.dtype, np.floating, # 'float' 'int' ) or np.issubdtype(vals.dtype, np.signedinteger): if key in tokenmap: # use the standard (i, u, rhoa) key if key not in abmn: if verbose: pg.debug("Setting", tokenmap[key], "from", key) data.set(tokenmap[key], vals * unitmap.get(key, 1.0)) else: # use the original key if not XX(x) etc. if not re.search('([x-z])', key) and key not in abmn: data.set(key.replace(' ', '_'), d[key]) r = data['u'] / data['i'] if hasattr(d, 'R(0)'): if np.linalg.norm(r-d['R(O)']) < 1e4: # no idea what's that for data.set('r', r) else: pg.debug("Warning! File inconsistent") data.sortSensorsX() if return_header: return data, header else: return data # def importAsciiColumns(...) def readAsDictionary(content, token=None, sep=None): # obsolote due to numpy? """Read list of strings from a file as column separated dictionary. e.g. token1 token2 token3 token4 va1 va2 val3 val4 va1 va2 val3 val4 va1 va2 val3 val4 Parameters ---------- content: [string] List of strings read from file: e.g. with open(filename, 'r') as fi: content = fi.readlines() fi.close() token: [string] If given the tokens will be the keys of the resulting dictionary. When token is None, tokens will be the first row values. When token is a empty list, the tokens will be autonamed to 'col' + str(ColNumber) ret: dictionary Dictionary of all data """ data = dict() if token is None: header = content[0].splitlines()[0].split(sep) token = [] for i, tok in enumerate(header): tok = tok.lstrip() token.append(tok) for i, row in enumerate(content[1:]): vals = row.splitlines()[0].split(sep) for j, v in enumerate(vals): v = v.replace(',', '.') if len(token) < j+1: token.append('col' + str(j)) if token[j] not in data: data[token[j]] = [None] * (len(content)-1) try: data[token[j]][i] = float(v) except Exception: if len(v) == 1 and v[0] == '-': v = 0.0 data[token[j]][i] = v return data if __name__ == "__main__": pass