当前聚焦:python数据分析之单因素分析线性拟合及地理编码
【资料图】
目录
一、单因素分析线性拟合二、实现地理编码一、单因素分析线性拟合
功能:线性拟合,单因素分析,对散点图进行线性拟合,并放大散点图的局部位置输入:某个xlsx文件,包含"患者密度(人/10万人)"和"人口密度(人/平方千米)"两列输出:对这两列数据进行线性拟合,绘制散点实现代码:
import pandas as pd
from pylab import mpl
from scipy import optimize
import numpy as np
import matplotlib.pyplot as plt
def f_1(x, A, B):
return A*x + B
def draw_cure(file):
data1=pd.read_excel(file)
data1=pd.DataFrame(data1)
hz=list(data1["患者密度(人/10万人)"])
rk=list(data1["人口密度(人/平方千米)"])
hz_gy=[]
rk_gy=[]
for i in hz:
hz_gy.append((i-min(hz))/(max(hz)-min(hz)))
for i in rk:
rk_gy.append((i-min(rk))/(max(rk)-min(rk)))
n=["玄武区","秦淮区","建邺区","鼓楼区","浦口区","栖霞区","雨花台区","江宁区","六合区","溧水区","高淳区",
"锡山区","惠山区","滨湖区","梁溪区","新吴区","江阴市","宜兴市",
"鼓楼区","云龙区","贾汪区","泉山区","铜山区","丰县","沛县","睢宁县","新沂市","邳州市",
"天宁区","钟楼区","新北区","武进区","金坛区","溧阳市",
"虎丘区","吴中区","相城区","姑苏区","吴江区","常熟市","张家港市","昆山市","太仓市",
"崇川区","港闸区","通州区","如东县","启东市","如皋市","海门市","海安市",
"连云区","海州区","赣榆区","东海县","灌云县","灌南县",
"淮安区","淮阴区","清江浦区","洪泽区","涟水县","盱眙县","金湖县",
"亭湖区","盐都区","大丰区","响水县","滨海县","阜宁县","射阳县","建湖县","东台市",
"广陵区","邗江区","江都区","宝应县","仪征市","高邮市",
"京口区","润州区","丹徒区","丹阳市","扬中市","句容市",
"海陵区","高港区","姜堰区","兴化市","靖江市","泰兴市",
"宿城区","宿豫区","沭阳县","泗阳县","泗洪县"]
mpl.rcParams["font.sans-serif"] = ["FangSong"]
plt.figure(figsize=(16,8),dpi=98)
p1 = plt.subplot(121)
p2 = plt.subplot(122)
p1.scatter(rk_gy,hz_gy,c="r")
p2.scatter(rk_gy,hz_gy,c="r")
p1.axis([0.0,1.01,0.0,1.01])
p1.set_ylabel("患者密度(人/10万人)",fontsize=13)
p1.set_xlabel("人口密度(人/平方千米)",fontsize=13)
p1.set_title("人口密度—患者密度相关性",fontsize=13)
for i,txt in enumerate(n):
p1.annotate(txt,(rk_gy[i],hz_gy[i]))
A1, B1 = optimize.curve_fit(f_1, rk_gy, hz_gy)[0]
x1 = np.arange(0, 1, 0.01)
y1 = A1*x1 + B1
p1.plot(x1, y1, "blue",label="一次拟合直线")
x2 = np.arange(0, 1, 0.01)
y2 = x2
p1.plot(x2, y2,"g--",label="y=x")
p1.legend(loc="upper left",fontsize=13)
# # plot the box
tx0 = 0;tx1 = 0.1;ty0 = 0;ty1 = 0.2
sx = [tx0,tx1,tx1,tx0,tx0]
sy = [ty0,ty0,ty1,ty1,ty0]
p1.plot(sx,sy,"purple")
p2.axis([0,0.1,0,0.2])
p2.set_ylabel("患者密度(人/10万人)",fontsize=13)
p2.set_xlabel("人口密度(人/平方千米)",fontsize=13)
p2.set_title("人口密度—患者密度相关性",fontsize=13)
for i,txt in enumerate(n):
p2.annotate(txt,(rk_gy[i],hz_gy[i]))
p2.plot(x1, y1, "blue",label="一次拟合直线")
p2.plot(x2, y2,"g--",label="y=x")
p2.legend(loc="upper left",fontsize=13)
plt.show()
if __name__ == "__main__":
draw_cure("F:\医学大数据课题\论文终稿修改\scientific report\返修\市区县相关分析 _2231.xls")实现效果:
二、实现地理编码
输入:中文地址信息,例如安徽为县天城镇都督村冲里18号输出:经纬度坐标,例如107.34799754989581 30.50483335424108功能:根据中文地址信息获取经纬度坐标实现代码:
import json
from urllib.request import urlopen,quote
import xlrd
def readXLS(XLS_FILE,sheet0):
rb= xlrd.open_workbook(XLS_FILE)
rs= rb.sheets()[sheet0]
return rs
def getlnglat(adress):
url = "http://api.map.baidu.com/geocoding/v3/?address="
output = "json"
ak = "fdi11GHN3GYVQdzVnUPuLSScYBVxYDFK"
add = quote(adress)#使用quote进行编码 为了防止中文乱码
# add=adress
url2 = url + add + "&output=" + output + "&ak=" + ak
req = urlopen(url2)
res = req.read().decode()
temp = json.loads(res)
return temp
def getlatlon(sd_rs):
nrows_sd_rs=sd_rs.nrows
for i in range(4,nrows_sd_rs):
# for i in range(4, 7):
row=sd_rs.row_values(i)
print(i,i/nrows_sd_rs)
b = (row[11]+row[12]+row[9]).replace("#","号") # 第三列的地址
print(b)
try:
lng = getlnglat(b)["result"]["location"]["lng"] # 获取经度并写入
lat = getlnglat(b)["result"]["location"]["lat"] #获取纬度并写入
except KeyError as e:
lng=""
lat=""
f_err=open("f_err.txt","a")
f_err.write(str(i)+"\t")
f_err.close()
print(e)
print(lng,lat)
f_latlon = open("f_latlon.txt", "a")
f_latlon.write(row[0]+"\t"+b+"\t"+str(lng)+"\t"+str(lat)+"\n")
f_latlon.close()
if __name__=="__main__":
# sle_xls_file = "F:\医学大数据课题\江苏省SLE数据库(两次随访合并).xlsx"
sle_xls_file = "F:\医学大数据课题\数据副本\江苏省SLE数据库(两次随访合并) - 副本.xlsx"
sle_data_rs = readXLS(sle_xls_file, 1)
getlatlon(sle_data_rs)结果展示:
到此这篇关于python数据分析之单因素分析线性拟合及地理编码的文章就介绍到这了,更多相关python数据分析内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家!
X 关闭
X 关闭

