注解

此笔记本可在此处下载: 04_Derivation_of_data_with_noise.ipynb

有噪声数据的推导

代码作者:Emile Roux emile.roux@univ-smb.fr

RISE 幻灯片

范围

  • 有限数 \(N\) 数据点数量 \((x,y)\) 这些数据包括噪声:计算导数 \(\dfrac{{dx}}{{dy}}\)

# Setup
%load_ext autoreload
%matplotlib nbagg
%autoreload 2
import numpy as np
import pandas as pd
import matplotlib as mpl
from PIL import Image # On charge Python Image Library
from matplotlib import pyplot as plt # On charge pyplot (un sous module de Matplotlib) et on le renomme plt
from matplotlib import cm
from scipy import ndimage
from scipy import interpolate
from scipy import signal

import gpxpy
import gpxpy.gpx
from geopy.distance import great_circle, vincenty

介绍

针对阿尔卑斯山徒步旅行GPS数据存在的噪声数据导数竞争问题,提出了一种分析方法。

GPS定位受到噪声的影响,这种噪声是由于GPS的精度造成的。

用于读取数据窗体的类 Earthexplorer

这个类处理来自 Earthexplorer (需要登录)。支持的格式为:数字高程/aster全局DEM,采用geotiff格式1pix=1弧秒

class ElevMap:
    """
    This class manipulates elavation map comming from https://earthexplorer.usgs.gov/.
    The supported format is : digital  elevation / SRMT 1 arc-seconde global
    Where 1pix = 1 arc second
    """
    def __init__(self, file_name):
        self.file_name = file_name
        self.read_elevation_from_file()


    #https://librenepal.com/article/reading-srtm-data-with-python/
    def read_elevation_from_file(self):
        SAMPLES = 3601 # Change this to 3601 for SRTM1
        with open(self.file_name, 'rb') as hgt_data:
            # Each data is 16bit signed integer(i2) - big endian(>)
            elevations = np.fromfile(hgt_data, np.dtype('i2'), SAMPLES*SAMPLES).reshape((SAMPLES, SAMPLES))

        self.elevations = elevations[4:,4:].astype(np.float64)

        # Creat the grid of coordonee associated with the elevation map
        Nx,Ny = self.elevations.shape
        cor=self.file_name.split("_")[-2]
        N=float(cor[1:3])
        E=float(cor[4:])
        x = np.linspace(E, E + 1, Nx)
        y = np.linspace(N + 1, N, Ny)
        self.Xg, self.Yg = np.meshgrid(x, y)

        self.aspect = great_circle((self.Yg[0,0] ,self.Xg[0,0]) , (self.Yg[-1,0] ,self.Xg[-1,0])).km / great_circle((self.Yg[0,0] ,self.Xg[0,0]) , (self.Yg[0,-1] ,self.Xg[0,-1])).km


    def plot(self,ax):
        cs = ax.contourf(self.Xg, self.Yg, self.elevations , vmin=0, vmax=4808)
        ax.set_aspect(aspect=self.aspect)
        return cs


    def plot_track(self,ax, df):
        mask = np.ones(self.Xg.shape)

        large = .03
        mask[np.where(self.Xg  < df.long.min() - large  )]=np.nan
        mask[np.where(self.Xg  > df.long.max() + large )]=np.nan

        mask[np.where(self.Yg < df.lat.min() - large )]=np.nan
        mask[np.where(self.Yg > df.lat.max() + large )]=np.nan

        cs = ax.contourf(self.Xg , self.Yg , self.elevations*mask )
        plt.plot(df.long,df.lat,'r',label = "path")
        ax.set_aspect(aspect=self.aspect)

        ax.set_xlim(df.long.min() - large  , df.long.max() + large )
        ax.set_ylim(df.lat.min()  - large  , df.lat.max()  + large )

        return cs

一些城市要在地图上绘制

site_coord=np.array([[6.865133,45.832634],[ 6.157845,45.919490],[6.389560,45.663787], [6.864840,45.916684],[6.633973,45.935047],[6.672323,45.200746],[ 6.425097, 45.904318]])
site_name = ["Mont Blanc", "Polytech", "Albertville","Chamonix",'Sallanches', "Modane","La Cluzas"]

读取地图高程

M1 = ElevMap('.\_DATA\ASTGTM2_N45E006_dem.tif')
#M2 = ElevMap('.\_DATA\ASTGTM2_N46E007_dem.tif')
#M3 = ElevMap('.\_DATA\ASTGTM2_N45E007_dem.tif')
#M4 = ElevMap('.\_DATA\ASTGTM2_N46E006_dem.tif')

绘制数据

fig = plt.figure()
ax = fig.gca()
#M2.plot(ax)
#M3.plot(ax)
#M4.plot(ax)
cs=M1.plot(ax)

cbar = fig.colorbar(cs)
cbar.set_label('Altitude $m$')   # On specifie le label en z

ax.plot(site_coord[:,0],site_coord[:,1],'r+',markersize=3)
for i, txt in enumerate(site_name):
    ax.annotate(txt, (site_coord[i,0]+.01,site_coord[i,1]-0.01))

plt.show()
<IPython.core.display.Javascript object>

GPS点数据集

GPX格式是标准格式。

pygpx模块用于imoprt数据(https://pypi.python.org/pypi/gpxpy

gpx_file = open( './_DATA/activity_Raquette.gpx', 'r' )

解析gpx数据,使其成为pandas数据帧

解析gpx文件

gpx = gpxpy.parse(gpx_file)

启动熊猫数据帧

df = pd.DataFrame([] , index = [], columns = ['time','seconde', 'lat', 'long','elevation','dist'])

用GPX数据填充数据帧

i=0
t0 = gpx.tracks[0].segments[0].points[0].time
for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            if i==0:
                df.loc[i]=[(point.time-t0),(point.time-t0).seconds,point.latitude, point.longitude, point.elevation,0.]
            else:
                dist = df.dist[i-1] + great_circle((point.latitude,point.longitude) , (df.lat[i-1],df.long[i-1])).km*1000.
                df.loc[i]=[(point.time-t0),(point.time-t0).seconds,point.latitude, point.longitude, point.elevation,dist]
            i=i+1

显示数据帧

df.head(30)
time seconde lat long elevation dist
0 00:00:00 0 45.889134 6.451944 1761.599976 0.000000
1 00:00:01 1 45.889134 6.451944 1761.599976 0.055937
2 00:00:03 3 45.889138 6.451947 1761.199951 0.561099
3 00:00:06 6 45.889148 6.451971 1761.400024 2.746140
4 00:00:07 7 45.889151 6.451982 1761.800049 3.594090
5 00:00:08 8 45.889153 6.451989 1762.199951 4.204322
6 00:00:09 9 45.889156 6.451997 1762.599976 4.936265
7 00:00:10 10 45.889159 6.452006 1763.199951 5.693639
8 00:00:11 11 45.889161 6.452014 1763.400024 6.337976
9 00:00:17 17 45.889176 6.452063 1763.400024 10.514476
10 00:00:22 22 45.889187 6.452107 1763.400024 14.115267
11 00:00:28 28 45.889204 6.452154 1763.599976 18.182346
12 00:00:29 29 45.889206 6.452160 1764.000000 18.697474
13 00:00:30 30 45.889208 6.452169 1764.400024 19.448647
14 00:00:31 31 45.889210 6.452177 1764.800049 20.132073
15 00:00:32 32 45.889212 6.452189 1765.199951 21.071081
16 00:00:33 33 45.889212 6.452194 1765.400024 21.495445
17 00:00:36 36 45.889214 6.452223 1765.400024 23.698945
18 00:00:42 42 45.889233 6.452275 1765.400024 28.232220
19 00:00:44 44 45.889238 6.452297 1765.800049 30.061939
20 00:00:45 45 45.889239 6.452298 1766.199951 30.203330
21 00:00:46 46 45.889245 6.452316 1766.599976 31.733769
22 00:00:47 47 45.889242 6.452318 1767.000000 32.014060
23 00:00:48 48 45.889249 6.452334 1767.400024 33.435011
24 00:00:49 49 45.889252 6.452340 1767.400024 34.015566
25 00:00:55 55 45.889263 6.452388 1767.400024 37.907578
26 00:00:58 58 45.889272 6.452408 1767.800049 39.750404
27 00:00:59 59 45.889274 6.452418 1768.199951 40.580836
28 00:01:00 60 45.889276 6.452420 1768.599976 40.820970
29 00:01:01 61 45.889278 6.452433 1769.000000 41.842973

绘制轨迹

fig = plt.figure()
ax = fig.gca()
cs = M1.plot_track(ax,df)
cbar = fig.colorbar(cs)
cbar.set_label('Altitude $m$')   # On specifie le label en z

ax.plot(site_coord[:,0],site_coord[:,1],'r+',markersize=3)
for i, txt in enumerate(site_name):
    ax.annotate(txt, (site_coord[i,0]+.002,site_coord[i,1]-0.002))
<IPython.core.display.Javascript object>

绘制高程与距离

fig = plt.figure()
plt.plot(df.dist,df.elevation,'b',label = "Elevation")
plt.ylabel("Distance [$m$]")
plt.xlabel("Elevation [$m$]")
plt.grid()
plt.legend()
plt.show()
<IPython.core.display.Javascript object>

绘制距离与时间的关系图

fig = plt.figure()
plt.plot(df.seconde/60,df.dist,'b',label = "path")
plt.xlabel("temps [$min$]")
plt.ylabel("Distance [$m$]")
plt.grid()
plt.legend()
plt.show()
<IPython.core.display.Javascript object>

如何计算雪鞋打嗝时的速度?

我们知道,为了得到速度,我们只需要计算距离对时间的导数。

读者可以参考“本子的导数”来引用

vit = np.diff(df.dist)/np.diff(df.seconde)
vit = vit * 3.6 # m/s to km/h

绘制速度与时间的关系图

fig = plt.figure()
plt.plot(df.seconde[:-1]/60,vit,'+b')
plt.xlabel("temps [$min$]")
plt.ylabel("Velocity [$km/h$]")
plt.grid()
plt.show()
<IPython.core.display.Javascript object>

所得结果不可读,也不提供有价值的信息!

skip = 5
vit = np.diff(signal.savgol_filter(df.dist[::skip] , 41, 2))/np.diff(df.seconde[::skip])
vit = signal.savgol_filter(df.dist , 101, 1, deriv=1, delta = 1)
vit = vit * 3.6 # m/s to km/h
fig = plt.figure()
plt.plot(df.seconde/60,vit,'b')
plt.xlabel("temps [$min$]")
plt.ylabel("Velocity [$km/h$]")
plt.grid()
plt.show()
<IPython.core.display.Javascript object>