SQL join语法案例
Data:
order.txt
order011,u001,300
order012,u002,200
order023,u006,100
order056,u007,300
order066,u003,500
order055,u004,300
order021,u005,300
order014,u001,100
order025,u005,300
order046,u007,30
order067,u003,340
order098,u008,310
user.txt
u001,hls,22,fengjie
u002,wangwu,31,lisi
u003,zhangyanru,22,tananpengyou
u004,laocao,26,fengyi
u005,mengqi,12,nvmengqi
u006,haolei,38,sb
u007,wanghongjing,24,wife
u009,wanghongjing,24,wife
返回一个结果:order011 u001 300 hls 22 fengjie
代码示例:
package com.doit.day03
import scala.io.{BufferedSource, Source}
object JoinDemo {
def main(args: Array[String]): Unit = {
//u001,hls,22,fengjie
val bs1= Source.fromFile("D:\develop\ideaWorkSpace\myself\study\scalaDemo\data\user.txt")/*.getLines().toList*/
//order011,u001,300
val bs2 = Source.fromFile("D:\develop\ideaWorkSpace\myself\study\scalaDemo\data\order.txt")/*.getLines().toList*/
//实现left join
/*
//将用户数据转换成map集合
val users: Iterator[String] = bs1.getLines()
val iters: Iterator[(String, (String, String, String, String))] = users.map(_.split(",", -1)).map(x => (x(0), (x(0), x(1), x(2), x(3))))
val map: Map[String, (String, String, String, String)] = iters.toMap
// 将订单数据转换成list集合
val orders: Iterator[String] = bs2.getLines()
val iters2: Iterator[(String, (String, String))] = orders.map(_.split(",", -1)).map(x => (x(1), (x(0), x(1))))
val list2: List[(String, (String, String))] = iters2.toList
//遍历每个订单 拼接用户信息
var r = list2.map(x => {
val user = map.getOrElse(x._1, ("null", "null", "null", "null"))
(user._1, user._2, user._3, user._4, x._2._1)
})
// 打印结果
r.sortBy(_._1).foreach(println)
*/
//实现join
/*
val userTuple: List[(String, String, String, String)] = users.map(line => {
val arr: Array[String] = line.split(",")
//user_id,user_name,age,name
(arr(0), arr(1), arr(2), arr(3))
})
val orderTuple: List[(String, String, String)] = orders.map(line => {
val arr: Array[String] = line.split(",")
//order_id user_id amount
(arr(0), arr(1), arr(2))
})
//join关联条件是user_id = user_id
for (user <- userTuple) {
for (order <- orderTuple) {
if(user._1 == order._2){
println(user._1,user._2,user._3,user._4,order._1,order._3)
}
}
}
*/
}
}
pv,uv案例
数据:
site1,user1,2018-03-01 02:12:22
site1,user2,2018-03-05 04:12:22
site1,user2,2018-03-05 04:13:22
site1,user2,2018-03-05 04:14:22
site1,user2,2018-03-05 04:15:22
site4,user7,
site1,user2,2018-03-05 05:15:22
site1,user2,2018-03-05 08:15:22
site1,user3,2018-03-05 04:15:22
site1,user4,2018-03-05 05:15:22
site1,user3,2018-03-07 11:12:22
site1,user3,2018-03-08 11:12:22
site2,user4,2018-03-07 15:12:22
site3,user5,2018-03-07 08:12:22
site3,user6,2018-03-05 08:12:22
site1,user1,2018-03-08 11:12:22
site1,,2018-03-08 11:12:22
site2,user2,2018-03-07 15:12:22
site3,user5,2018-03-07 08:12:22
site3,user5,2018-03-07 18:12:22
site3,user6,2018-03-05 08:12:22
site4,user7,2018-03-03 10:12:22
site2,,2018-03-08 11:12:22
site3,user5,2018-03-07 08:12:22
site3,user6,2018-03-05 08:12:22
site4,user5,2018-03-03 10:12:22
site4,user7,2018-02-20 11:12:22
代码:
package com.doit.day03
import scala.io.{BufferedSource, Source}
/**
* 需求:计算每天的pv和uv
* pv:浏览次数
* uv:访客数
*/
object PVUVDemo {
def main(args: Array[String]): Unit = {
val source: BufferedSource = Source.fromFile("D:\develop\ideaWorkSpace\myself\study\scalaDemo\data\pvuv.txt")
val list: List[String] = source.getLines().toList
//过滤一些脏数据
val filtered: List[String] = list.filter(line => {
val arr: Array[String] = line.split(",",-1)
arr.length >= 0 && !arr.exists(_.isEmpty)
})
val events: List[(String, String, String)] = filtered.map(line => {
val arr: Array[String] = line.split(",")
val date: String = arr(2).substring(0, 10)
//site1,user1,2018-03-01 02:12:22
(arr(0), arr(1),date)
})
//pv:该页面被浏览了多少次
val tuples: List[((String, String), String)] = events.map(tp => {
((tp._3, tp._1), tp._2)
})
val pv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.size))
val uv: Map[(String, String), Int] = tuples.groupBy(_._1).map(tp => (tp._1, tp._2.distinct.size))
println("============pv================")
pv.foreach(println)
println("============uv================")
uv.foreach(println)
}
}
线段重叠案例
data:
1,4
2,5
4,6
2,4
3,6
4,6
1,5
代码:
package com.doit.day03
import scala.io.Source
object LineDemo {
def main(args: Array[String]): Unit = {
val list: List[String] = Source.fromFile("D:\develop\ideaWorkSpace\myself\study\scalaDemo\data\line.txt").getLines().toList
//生成一个个的点
val points: List[Range.Inclusive] = list.map(line => {
val arr: Array[String] = line.split(",")
val start: String = arr(0)
val end: String = arr(1)
//按照起始得位置确定这一行中一共有哪些点,后面才能判断哪些点是重合的
//根据开始和结束得点
val range: Range.Inclusive = start.toInt to end.toInt
range
})
//将点压平
val flattened: List[Int] = points.flatten
//对点分组
val intToInts: Map[Int, List[Int]] = flattened.groupBy(point => point)
//获取到最后的个数
val res: Map[Int, Int] = intToInts.map(tp => (tp._1, tp._2.size))
res.foreach(println)
}
}
内容来源于网络如有侵权请私信删除
文章来源: 博客园
- 还没有人评论,欢迎说说您的想法!