gpx_reduce.py
changeset 0 8f4668cd868b
child 1 5467c893d121
equal deleted inserted replaced
-1:000000000000 0:8f4668cd868b
       
     1 #!/usr/bin/env python
       
     2 # -*- coding: utf8 -*-
       
     3 
       
     4 '''
       
     5 gpx_reduce v1.8: removes points from gpx-files to reduce filesize and
       
     6 tries to keep introduced distortions to the track at a minimum.
       
     7 Copyright (C) 2011,2012,2013,2015,2016,2017 travelling_salesman on OpenStreetMap
       
     8 
       
     9 changelog: v1.2: clarity refractoring + speedup for identical points
       
    10            v1.3: new track weighting functions, progress display
       
    11            v1.4: new track weighting function, restructuring for memory saving
       
    12            v1.5: algorithm speedup by roughly a factor of 2 by eliminating some cases.
       
    13            v1.6: presets for train etc.
       
    14            v1.7: introduced weighting function for elevation errors
       
    15            v1.8: speed-dependent distance limit
       
    16 
       
    17 This program is free software: you can redistribute it and/or modify
       
    18 it under the terms of the GNU General Public License as published by
       
    19 the Free Software Foundation, either version 3 of the License, or
       
    20 (at your option) any later version.
       
    21 
       
    22 This program is distributed in the hope that it will be useful,
       
    23 but WITHOUT ANY WARRANTY; without even the implied warranty of
       
    24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
       
    25 GNU General Public License for more details.
       
    26 
       
    27 You should have received a copy of the GNU General Public License
       
    28 along with this program.  If not, see <http://www.gnu.org/licenses/>.
       
    29 '''
       
    30 
       
    31 import datetime
       
    32 import sys
       
    33 import time
       
    34 import pylab as pl
       
    35 import scipy as sc
       
    36 import numpy.linalg as la
       
    37 from math import *
       
    38 from lxml import etree
       
    39 from optparse import OptionParser
       
    40 
       
    41 
       
    42 parser = OptionParser('usage: %prog [options] input-file.gpx')
       
    43 parser.add_option('-v', '--verbose', action='store', type='int',
       
    44     dest='verbose', default=1, help='verbose=[0,1]')
       
    45 parser.add_option('-p', '--plot', action='store_true', dest='plot',
       
    46     default=False, help='Show a plot of the result at the end.')
       
    47 parser.add_option('-d', '--dist', action='store', type='float', dest='max_dist',
       
    48     default=0.5, help='Maximum distance of line from original points in meters')
       
    49 parser.add_option('-o', '--out', action='store', type='string',
       
    50     dest='ofname', default=None, help='Output file name')
       
    51 parser.add_option('-m', '--maxsep', action='store', type='float', dest='max_sep',
       
    52     default=200.0, help='Absolute maximum separation of points. No points will be deleted where the resulting distance would become greater than maxsep. Standard JOSM settings will not display points spaced more than 200m. -1 means no limit.')
       
    53 parser.add_option('-n', '--maxsep0', action='store', type='float', dest='max_sep0',
       
    54     default=4.0, help='Maximum separation of points at zero speed.')
       
    55 parser.add_option('-t', '--maxseptime', action='store', type='float', dest='max_sep_time',
       
    56     default=3.0, help='Maximum time separation of points, which will be added to maxsep0. Maximum allowed point separation will be min(m, n + t*v) where v is the speed in m/s.')
       
    57 parser.add_option('-e', '--ele-weight', action='store', type='float',
       
    58     dest='ele_weight', default=0.0,
       
    59     help='Weighting of elevation errors vs. horizontal errors. Default is 0.')
       
    60 parser.add_option('-b', '--bend', action='store', type='float', dest='bend',
       
    61     default=1.0, help='Penalty value for large bending angles at each trackpoint. Larger values (1 or more) make the track smoother.')
       
    62 parser.add_option('-w', '--weighting', action='store', type='string',
       
    63     dest='weighting', default='exp',
       
    64     help='''Weighting function to be minimized for track reduction:
       
    65 pnum (number of points),
       
    66 sqrdistsum (number of points plus sum of squared distances to leftout points),
       
    67 sqrdistmax (number of points plus sum of squared distances to each maximally separated leftout point per new line segment),
       
    68 sqrlength (number of points plus sum of squared new line segment lengths normalized by maxsep),
       
    69 mix (number of points plus sum of squared distances to each maximally separated leftout point per new line segment weighted with corresponding segment length),
       
    70 exp (number of points plus sum of squared distances to leftout points with exponential weighting of 1/2, 1/4, 1/8... from furthest to closest point). exp=standard''')
       
    71 (options, args) = parser.parse_args()
       
    72 
       
    73 
       
    74 if len(args) < 1:
       
    75     parser.print_usage()
       
    76     exit(2)
       
    77 
       
    78 
       
    79 # use the WGS-84 ellipsoid
       
    80 rE = 6356752.314245 # earth's radius
       
    81 a = 6378137.0
       
    82 b = 6356752.314245179
       
    83 
       
    84 timeformat = '%Y-%m-%dT%H:%M:%SZ'
       
    85 
       
    86 
       
    87 def distance(p1_, pm_, p2_, ele_weight=1.0):
       
    88     # returns distance of pm from line between p1 and p2
       
    89 
       
    90     p1, pm, p2 = sc.array(p1_), sc.array(pm_), sc.array(p2_)
       
    91     h1, hm, h2 = la.norm(p1), la.norm(pm), la.norm(p2)
       
    92     if ele_weight != 1.0 and min(h1, hm, h2) > 0.0:
       
    93         hmean = (h1 + hm + h2) / 3.0
       
    94         p1 *= (ele_weight + (1.0 - ele_weight) * hmean / h1)
       
    95         pm *= (ele_weight + (1.0 - ele_weight) * hmean / hm)
       
    96         p2 *= (ele_weight + (1.0 - ele_weight) * hmean / h2)
       
    97     line = p2 - p1
       
    98     linel = la.norm(line)
       
    99     vm = pm - p1
       
   100     if linel == 0.0:
       
   101         return la.norm(vm)
       
   102     linem = line / linel
       
   103     
       
   104     position = pl.dot(vm, linem) / linel
       
   105     if position < 0.0:
       
   106         return la.norm(vm)
       
   107     elif position > 1.0:
       
   108         return la.norm(pm - p2)
       
   109     else:
       
   110         return la.norm(vm - line * position)
       
   111 
       
   112 
       
   113 def rotate(x, y, phi):
       
   114     return x*cos(phi) - y*sin(phi), x*sin(phi) + y*cos(phi)
       
   115 
       
   116 
       
   117 def project_to_meters(lat, lon, latm, lonm):
       
   118     # azimuthal map projection centered at average track coordinate
       
   119     lon -= lonm
       
   120     xyz = latlonele_to_xyz(lat, lon, 0.0)
       
   121     zy = rotate(xyz[2], xyz[1], radians(90 - latm))
       
   122     lat2 = atan2(zy[0], la.norm([zy[1], xyz[0]]))
       
   123     lon2 = atan2(xyz[0], -zy[1])
       
   124     x_meters = rE * sin(lon2) * (pi / 2.0 - lat2)
       
   125     y_meters = -rE * cos(lon2) * (pi / 2.0 - lat2)
       
   126     return x_meters, y_meters
       
   127 
       
   128 
       
   129 def latlonele_to_xyz(lat, lon, ele):
       
   130     s = sin(radians(lat))
       
   131     c = cos(radians(lat))
       
   132     r = ele + a * b / la.norm([s*a, c*b])
       
   133     lon = radians(lon)
       
   134     return r * c * sin(lon), r * c * (-cos(lon)), r * s
       
   135 
       
   136 
       
   137 def xyz_to_latlonele(x, y, z):
       
   138     r = la.norm([x, y, z])
       
   139     if (r == 0):
       
   140         return 0.0, 0.0, 0.0
       
   141     lat = degrees(atan2(z, la.norm([x, y])))
       
   142     lon = degrees(atan2(x, -y))
       
   143     ele = r * (1.0 - a * b / la.norm([a*z, b*x, b*y]))
       
   144     return lat, lon, ele
       
   145 
       
   146 
       
   147 def reduced_track_indices(coordinate_list, timesteps=None):
       
   148     # returns a list of indices of trackpoints that constitute the reduced track
       
   149     # takes a list of kartesian coordinate tuples
       
   150     m = len(coordinate_list)
       
   151     if (m == 0): return []
       
   152     if timesteps != None and len(timesteps) != len(coordinate_list):
       
   153         timesteps = None
       
   154     
       
   155     # number of dimensions
       
   156     d = len(coordinate_list[0])
       
   157     
       
   158     # remove identical entries (can speed up algorithm considerably)
       
   159     original_indices = [0]
       
   160     points = [{'p': coordinate_list[0], 'weight':1}]
       
   161     if timesteps != None: points[0]['t'] = timesteps[0]
       
   162     for i in range(1, m):
       
   163         if False in [coordinate_list[i-1][j] == coordinate_list[i][j] for j in range(d)]:
       
   164             original_indices.append(i)
       
   165             points.append({'p': coordinate_list[i], 'weight':1})
       
   166             if timesteps != None: points[-1]['t'] = timesteps[i]
       
   167         else:
       
   168             points[-1]['weight'] += 1
       
   169     n = len(points)
       
   170     
       
   171     # progress printing initialisations
       
   172     progress_printed = False
       
   173     progress = None
       
   174     tprint = time.time()
       
   175     
       
   176     # execute Dijkstra-like algorithm on points
       
   177     points[0]['cost'] = 1.0
       
   178     points[0]['prev'] = -1
       
   179     
       
   180     for i2 in range(1, n):
       
   181         penalties = {}
       
   182         imin = None
       
   183         costmin = float('inf')
       
   184         for i1 in reversed(range(i2)):
       
   185             p1 = sc.array(points[i1]['p'])
       
   186             p2 = sc.array(points[i2]['p'])
       
   187             seglength = la.norm(p2 - p1)
       
   188             
       
   189             # estimate speed between p1 and p2
       
   190             if timesteps != None:
       
   191                 dt = (points[i2]['t'] - points[i1]['t']).total_seconds()
       
   192                 v = seglength / max(0.1, dt)
       
   193             else:
       
   194                 v = seglength / float(i2 - i1) # assume 1s time spacing
       
   195             
       
   196             max_sep = options.max_sep0 + v * options.max_sep_time
       
   197             if options.max_dist >= 0:
       
   198                 max_sep = min(max_sep, options.max_sep)
       
   199             
       
   200             if (seglength >= max_sep and i1 != i2 - 1):
       
   201                 # point separation is too far
       
   202                 # but always accept direct predecessor i1 = i2 - 1
       
   203                 if (seglength >= max_sep + options.max_dist):
       
   204                     # no chance to find a valid earlier predecessor point
       
   205                     break
       
   206                 else:
       
   207                     continue
       
   208             
       
   209             if points[i1]['cost'] + 1.0 > costmin:
       
   210                 # the possible predecessor i1 is already too bad.
       
   211                 continue
       
   212             
       
   213             i1_i2_segment_valid = True
       
   214             lower_i1_possible = True
       
   215             distance_squaremax = 0.0
       
   216             distance_squaresum = 0.0
       
   217             distances_squared = []
       
   218             # iterate all medium points between i1 and i2
       
   219             for im in range(i1+1, i2):
       
   220                 pm = sc.array(points[im]['p'])
       
   221                 d = distance(p1, pm, p2, options.ele_weight)
       
   222                 if (d <= options.max_dist):
       
   223                     d_sq = (d / options.max_dist) ** 2
       
   224                     distance_squaremax = max(distance_squaremax, d_sq)
       
   225                     distance_squaresum += points[im]['weight'] * d_sq
       
   226                     distances_squared.append(d_sq)
       
   227                 else:
       
   228                     i1_i2_segment_valid = False
       
   229                 
       
   230                     # check if connection to any further point i1 is impossible
       
   231                     d1 = pl.dot(p1 - p2, p1 - p2)
       
   232                     d2 = pl.dot(pm - p2, pm - p2)
       
   233                     dd = options.max_dist ** 2
       
   234                     d1d2 = pl.dot(p1 - p2, pm - p2)
       
   235                     # formula from cosines of point separation angle and cone-opening angles around points
       
   236                     if (d1 > dd and d2 > dd and (d1d2 + dd)**2 < (d2 - dd) * (d1 - dd)):
       
   237                         lower_i1_possible = False
       
   238                         break
       
   239             
       
   240             if (lower_i1_possible == False):
       
   241                 break
       
   242             
       
   243             if i1_i2_segment_valid:
       
   244                 if options.weighting == 'sqrdistmax':
       
   245                     penalties[i1] = distance_squaremax
       
   246                 elif options.weighting == 'sqrdistsum':
       
   247                     penalties[i1] = distance_squaresum
       
   248                 elif options.weighting == 'sqrlength':
       
   249                     penalties[i1] = (seglength / max_sep) ** 2
       
   250                 elif options.weighting == 'mix':
       
   251                     penalties[i1] = (distance_squaremax * (1.0 + seglength / max_sep))
       
   252                 elif options.weighting == 'exp':
       
   253                     penalties[i1] = 0.5 * sum([0.5**i * d for i, d in
       
   254                         enumerate(sorted(distances_squared, reverse=True))])
       
   255                 else:
       
   256                     penalties[i1] = 0.0
       
   257                 
       
   258                 # add a penalty for kinks
       
   259                 if options.bend > 0.:
       
   260                     if points[i1]['prev'] != -1:
       
   261                         p0 = sc.array(points[points[i1]['prev']]['p'])
       
   262                         v0 = p1 - p0
       
   263                         v1 = p2 - p1
       
   264                         if la.norm(v0) > 0. and la.norm(v1) > 0.:
       
   265                             v0 /= la.norm(v0)
       
   266                             v1 /= la.norm(v1)
       
   267                             kink = (1.0 - sc.dot(v0, v1)) / 2.0
       
   268                             penalties[i1] += options.bend * kink
       
   269         
       
   270         # find best predecessor
       
   271         imin = None
       
   272         costmin = float('inf')
       
   273         for prev, penalty in penalties.iteritems():
       
   274             # cost function is sum of points used (1.0) plus penalties
       
   275             cost = points[prev]['cost'] + 1.0 + penalty
       
   276             if cost < costmin:
       
   277                 imin = prev
       
   278                 costmin = cost
       
   279         points[i2]['cost'] = costmin
       
   280         points[i2]['prev'] = imin
       
   281         
       
   282         # print progess
       
   283         if options.verbose == 1 and (100 * i2) / n > progress and time.time() >= tprint + 1:
       
   284             tprint = time.time()
       
   285             progress = (100 * i2) / n
       
   286             print '\r', progress, '%',
       
   287             sys.stdout.flush()
       
   288             progress_printed = True
       
   289     
       
   290     if progress_printed:
       
   291         print '\r',
       
   292     
       
   293     # trace route backwards to collect final points
       
   294     final_pnums = []
       
   295     i = n-1
       
   296     while i >= 0:
       
   297         final_pnums = [i] + final_pnums
       
   298         i = points[i]['prev']
       
   299     
       
   300     return [original_indices[i] for i in final_pnums]
       
   301 
       
   302 
       
   303 
       
   304 ############################## main function #################################
       
   305 for fname in args:
       
   306     # initialisations
       
   307     tracksegs_old = []
       
   308     tracksegs_new = []
       
   309     sumx, sumy, sumz = 0.0, 0.0, 0.0
       
   310     
       
   311     # import xml data from files
       
   312     print 'opening file', fname
       
   313     infile = open(fname)
       
   314     
       
   315     tree = etree.parse(infile)
       
   316     infile.close()
       
   317     gpx = tree.getroot()
       
   318     if gpx.nsmap.has_key(None):
       
   319         nsmap = '{' + gpx.nsmap[None] + '}'
       
   320     else:
       
   321         nsmap = ''
       
   322     
       
   323     # extract data from xml
       
   324     for trkseg in gpx.findall('.//' + nsmap + 'trkseg'):
       
   325         trkpts = trkseg.findall(nsmap + 'trkpt')
       
   326         n = len(trkpts)
       
   327         
       
   328         # extract coordinate values
       
   329         lats = [float(trkpt.get('lat')) for trkpt in trkpts]
       
   330         lons = [float(trkpt.get('lon')) for trkpt in trkpts]
       
   331         eles = [float(trkpt.find(nsmap + 'ele').text) for trkpt in trkpts]
       
   332         try:
       
   333             times = [datetime.datetime.strptime(trkpt.find(nsmap + 'time'
       
   334                                        ).text, timeformat) for trkpt in trkpts]
       
   335         except Exception as e:
       
   336             print e
       
   337             times = None
       
   338         
       
   339         # save original trackseg for plotting
       
   340         if options.plot:
       
   341             tracksegs_old.append([[lats[i], lons[i], eles[i]] for i in range(n)])
       
   342         
       
   343         # calculate projected points to work on
       
   344         coords = []
       
   345         for i in range(n):
       
   346             x, y, z = latlonele_to_xyz(lats[i], lons[i], eles[i])
       
   347             coords.append((x, y, z))
       
   348             sumx += x
       
   349             sumy += y
       
   350             sumz += z
       
   351         
       
   352         # execute the reduction algorithm
       
   353         final_pnums = reduced_track_indices(coords, times)
       
   354         
       
   355         n_new = len(final_pnums)
       
   356         print 'number of points:', n, '-', n - n_new, '=', n_new
       
   357         
       
   358         # delete certain points from original data
       
   359         delete_pnums = [i for i in range(n) if i not in final_pnums]
       
   360         for i in reversed(delete_pnums):
       
   361             del trkseg[trkseg.index(trkpts[i])]
       
   362         
       
   363         # save reduced trackseg for plotting
       
   364         if options.plot:
       
   365             tracksegs_new.append([[float(trkpt.get('lat')),
       
   366                 float(trkpt.get('lon')), float(trkpt.find(nsmap + 'ele').text)]
       
   367                 for trkpt in trkseg.findall(nsmap + 'trkpt')])
       
   368         
       
   369     
       
   370     # export data to file
       
   371     if options.ofname != None:
       
   372         ofname = options.ofname
       
   373     elif fname.endswith('.gpx'):
       
   374         ofname = fname[:-4] + '_reduced.gpx'
       
   375     else:
       
   376         ofname = fname + '_reduced.gpx'
       
   377     outfile = open(ofname, 'w')
       
   378     outfile.write(etree.tostring(tree, xml_declaration=True,
       
   379         pretty_print=True, encoding='utf-8'))
       
   380     outfile.close()
       
   381     print 'modified copy written to', ofname
       
   382     
       
   383     
       
   384     # plot result to screen
       
   385     if options.plot:
       
   386         latm, lonm, elesum = xyz_to_latlonele(sumx, sumy, sumz)
       
   387 
       
   388         for trkseg in tracksegs_old:
       
   389             y_old = []
       
   390             x_old = []
       
   391             for trkpt in trkseg:
       
   392                 xy = project_to_meters(trkpt[0], trkpt[1], latm, lonm)
       
   393                 x_old.append(xy[0])
       
   394                 y_old.append(xy[1])
       
   395             pl.plot(x_old, y_old, 'r.-')
       
   396         
       
   397         for trkseg in tracksegs_new:
       
   398             y_new = []
       
   399             x_new = []
       
   400             for trkpt in trkseg:
       
   401                 xy = project_to_meters(trkpt[0], trkpt[1], latm, lonm)
       
   402                 x_new.append(xy[0])
       
   403                 y_new.append(xy[1])
       
   404             pl.plot(x_new, y_new, 'b.-')
       
   405         pl.grid()
       
   406         pl.gca().set_aspect('equal')
       
   407         pl.xlabel('x [m]')
       
   408         pl.ylabel('y [m]')
       
   409         pl.show()