博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
机器学习结果加ID插入数据库源码
阅读量:6035 次
发布时间:2019-06-20

本文共 2346 字,大约阅读时间需要 7 分钟。

import org.apache.spark.mllib.evaluation.BinaryClassificationMetricsimport org.apache.spark.mllib.linalg.Vectorsimport org.apache.spark.mllib.regression.LabeledPointimport org.apache.spark.mllib.tree.GradientBoostedTreesimport org.apache.spark.mllib.tree.configuration.BoostingStrategyimport org.apache.spark.mllib.tree.model.GradientBoostedTreesModelimport org.apache.spark.sql.{Row, SaveMode}import org.apache.spark.sql.hive.HiveContextimport org.apache.spark.sql.types.{DoubleType, StringType, StructField, StructType}import org.apache.spark.{SparkConf, SparkContext}import scala.collection.mutable.ArrayBufferobject v4score20180123 {  def main(args: Array[String]): Unit = {  val sparkConf = new SparkConf().setAppName("v4model20180123")  val sc = new SparkContext(sparkConf)  val hc = new HiveContext(sc)  val dataInstance = hc.sql(s"select * from lkl_card_score.fqz_score_dataset_04vals").map {    row =>      val arr = new ArrayBuffer[Double]()      //剔除label、phone字段      for (i <- 3 until row.size) {        if (row.isNullAt(i)) {          arr += 0.0        }        else if (row.get(i).isInstanceOf[Int])          arr += row.getInt(i).toDouble        else if (row.get(i).isInstanceOf[Double])          arr += row.getDouble(i)        else if (row.get(i).isInstanceOf[Long])          arr += row.getLong(i).toDouble        else if (row.get(i).isInstanceOf[String])          arr += 0.0      }      (row(0),row(1),row(2),Vectors.dense(arr.toArray))  }  val  modeltest=GradientBoostedTreesModel.load(sc,s"hdfs://ns1/user/songchunlin/model/v4model20180123s")  val preditDataGBDT = dataInstance.map { point =>    val prediction = modeltest.predict(point._4)    //order_id,apply_time,score    (point._1,point._2,point._3,prediction)  }  preditDataGBDT.take(5)  //rdd转dataFrame  val rowRDD = preditDataGBDT.map(row => Row(row._1.toString,row._2.toString,row._3.toString,row._4))  val schema = StructType(    List(      StructField("order_id", StringType, true),      StructField("apply_time", StringType, true),      StructField("label", StringType, true),      StructField("score", DoubleType, true)    )  )  //将RDD映射到rowRDD,schema信息应用到rowRDD上  val scoreDataFrame = hc.createDataFrame(rowRDD,schema)  scoreDataFrame.count()  scoreDataFrame.write.mode(SaveMode.Overwrite).saveAsTable("lkl_card_score.fqz_score_dataset_03val_v4_predict0123s")}}

  

转载地址:http://bzlhx.baihongyu.com/

你可能感兴趣的文章
杀死进程命令
查看>>
cookie 和session 的区别详解
查看>>
浮点数网络传输
查看>>
Mongodb对集合(表)和数据的CRUD操作
查看>>
面向对象类的解析
查看>>
tomcat如何修改发布目录
查看>>
CentOS 5.5 使用 EPEL 和 RPMForge 软件库
查看>>
Damien Katz弃Apache CouchDB,继以Couchbase Server
查看>>
Target runtime Apache Tomcat is not defined.错误解决方法
查看>>
某机字长为32位,存储容量为64MB,若按字节编址.它的寻址范围是多少?
查看>>
VC++ 监视文件(夹)
查看>>
【转】keyCode对照表及JS监听组合按键
查看>>
[Java开发之路](14)反射机制
查看>>
mac gentoo-prefix安装git svn
查看>>
浅尝异步IO
查看>>
C - Train Problem II——(HDU 1023 Catalan 数)
查看>>
Speak loudly
查看>>
iOS-在项目中引入RSA算法
查看>>
[译] 听说你想学 React.js ?
查看>>
gulp压缩合并js与css
查看>>