diff --git a/emp.txt b/emp.txt new file mode 100644 index 0000000..798094b --- /dev/null +++ b/emp.txt @@ -0,0 +1,14 @@ +7369,SMITH,CLERK,7902,2021-12-17,800,0,20,160,4,2024-06-01,"Java基础,SQL优化" +7499,ALLEN,SALESMAN,7698,2021-02-20,1600,300,30,180,5,2024-09-15,"销售技巧,客户关系管理" +7521,WARD,SALESMAN,7698,2021-02-22,1250,500,30,170,3,2025-03-10,"谈判技巧,市场分析" +7566,JONES,MANAGER,7839,2021-04-02,2975,0,20,150,4,2024-07-01,"领导力培训,项目管理" +7654,MARTIN,SALESMAN,7698,2021-09-28,1250,1400,30,190,5,2024-10-01,"销售策略,数据分析" +7698,BLAKE,MANAGER,7839,2021-05-01,2850,0,30,140,4,2024-08-15,"团队建设,沟通技巧" +7782,CLARK,MANAGER,7839,2021-06-09,2450,0,10,130,5,2024-05-01,"人力资源管理,绩效考核" +7788,SCOTT,ANALYST,7566,2023-12-09,3000,0,20,165,4,2024-11-01,"数据分析,数据库优化" +7839,KING,PRESIDENT,null,2021-11-17,5000,0,10,120,5,2024-04-01,"战略规划,企业治理" +7844,TURNER,SALESMAN,7698,2021-09-08,1500,0,30,175,3,2024-02-15,"销售技巧,客户服务" +7876,ADAMS,CLERK,7788,2024-01-12,1100,0,20,155,4,2025-01-01,"办公软件,文档处理" +7900,JAMES,CLERK,7698,2021-12-03,950,0,30,145,3,2025-03-01,"客户服务,订单处理" +7902,FORD,ANALYST,7566,2021-12-03,3000,0,20,170,5,2024-06-01,"数据分析,报告撰写" +7934,MILLER,CLERK,7782,2023-01-23,1300,0,10,150,4,2024-07-01,"财务报表,预算管理" \ No newline at end of file diff --git a/emp表的字段名称.txt b/emp表的字段名称.txt new file mode 100644 index 0000000..d1bbd4c --- /dev/null +++ b/emp表的字段名称.txt @@ -0,0 +1,2 @@ +ԱID,,ְλ,˾ID,ְʱ,,,IDԱÿ¹ʱЧ֣1-5֣һνڣμӵѵγб +empno,ename,job,mgr,hiredate,sal,credit,deptno,work_hours,performance_rating,promotion_date,training_courses \ No newline at end of file diff --git a/src/main/EmpHBaseClient.java b/src/main/EmpHBaseClient.java new file mode 100644 index 0000000..e8afd9a --- /dev/null +++ b/src/main/EmpHBaseClient.java @@ -0,0 +1,141 @@ +// 创建职工表结构 +public void createEmpTable() throws IOException { + HBaseAdmin admin = new HBaseAdmin(conf); + if (admin.tableExists("emp1520")) { + System.out.println("Table already exists"); + return; + } + HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf("emp1520")); + tableDesc.addFamily(new HColumnDescriptor("empnum")); // 存储员工ID + tableDesc.addFamily(new HColumnDescriptor("info")); // 存储基本信息 + tableDesc.addFamily(new HColumnDescriptor("salary")); // 存储薪资 + tableDesc.addFamily(new HColumnDescriptor("performance")); // 存储绩效信息 + tableDesc.addFamily(new HColumnDescriptor("training")); // 存储培训课程 + admin.createTable(tableDesc); + admin.close(); +} + +// 插入数据实现 +public void loadData() throws IOException { + Table table = connection.getTable(TableName.valueOf("emp1520")); + try (BufferedReader br = new BufferedReader(new FileReader("emp.txt"))) { + String line; + while ((line = br.readLine()) != null) { + String[] fields = line.split(","); + String empno = fields[0]; + String rowKey = generateRowKey(empno); // 生成MD5前缀+empno的RowKey + Put put = new Put(Bytes.toBytes(rowKey)); + // empnum列族 + put.addColumn(Bytes.toBytes("empnum"), Bytes.toBytes("empno"), Bytes.toBytes(empno)); + // info列族 + put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("ename"), Bytes.toBytes(fields[1])); + put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("job"), Bytes.toBytes(fields[2])); + put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("mgr"), Bytes.toBytes(fields[3])); + put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("hiredate"), Bytes.toBytes(fields[4])); + put.addColumn(Bytes.toBytes("info"), Bytes.toBytes("credit"), Bytes.toBytes(fields[5])); + // salary列族 + put.addColumn(Bytes.toBytes("salary"), Bytes.toBytes("sal"), Bytes.toBytes(fields[6])); + // performance列族 + put.addColumn(Bytes.toBytes("performance"), Bytes.toBytes("work_hours"), Bytes.toBytes(fields[7])); + put.addColumn(Bytes.toBytes("performance"), Bytes.toBytes("performance_rating"), Bytes.toBytes(fields[8])); + put.addColumn(Bytes.toBytes("performance"), Bytes.toBytes("promotion_date"), Bytes.toBytes(fields[9])); + // training列族 + put.addColumn(Bytes.toBytes("training"), Bytes.toBytes("training_courses"), Bytes.toBytes(fields[10])); + table.put(put); + } + } +} + +// RowKey生成方法 +private String generateRowKey(String empno) { + String prefix = empno.substring(0,3); + String md5 = md5Hash(prefix); + return md5.substring(0,4) + empno; // 前4位MD5+原empno +} + +// MD5计算方法 +private static String md5Hash(String input) { + try { + MessageDigest md = MessageDigest.getInstance("MD5"); + byte[] messageDigest = md.digest(input.getBytes()); + BigInteger number = new BigInteger(1, messageDigest); + return number.toString(16); + } catch (NoSuchAlgorithmException e) { + throw new RuntimeException(e); + } +} + +// 查询ID>7500的员工 +public void queryEmpNoOver7500() throws IOException { + Scan scan = new Scan(); + Filter filter = new SingleColumnValueFilter( + Bytes.toBytes("empnum"), + Bytes.toBytes("empno"), + CompareFilter.CompareOp.GREATER_OR_EQUAL, + new SubstringComparator("7500") + ); + scan.setFilter(filter); + ResultScanner scanner = table.getScanner(scan); + for (Result result : scanner) { + // 输出结果处理 + } +} + +// 绩效>4且入职早于2022年 +public void queryPerformanceAndHiredate() throws IOException { + FilterList list = new FilterList(FilterList.Operator.MUST_PASS_ALL); + list.addFilter(new SingleColumnValueFilter( + Bytes.toBytes("performance"), + Bytes.toBytes("performance_rating"), + CompareFilter.CompareOp.GREATER_OR_EQUAL, + new SubstringComparator("5") + )); + list.addFilter(new SingleColumnValueFilter( + Bytes.toBytes("info"), + Bytes.toBytes("hiredate"), + CompareFilter.CompareOp.LESS, + new SubstringComparator("2022-01-01") + )); + Scan scan = new Scan(); + scan.setFilter(list); + ResultScanner scanner = table.getScanner(scan); + // 结果处理逻辑 +} + +// 最近晋升员工查询 +public void findLatestPromotion() throws IOException { + Scan scan = new Scan(); + ResultScanner scanner = table.getScanner(scan); + List records = new ArrayList<>(); + for (Result result : scanner) { + String promotionDateStr = Bytes.toString( + result.getValue(Bytes.toBytes("performance"), Bytes.toBytes("promotion_date")) + ); + if (promotionDateStr != null) { + records.add(new PromotionRecord( + Bytes.toString(result.getRow()), + LocalDate.parse(promotionDateStr) + )); + } + } + records.sort(Comparator.comparing(PromotionRecord::getDate).reversed()); + // 输出最新记录 +} + +// 培训课程统计 +public void countTrainingCourses() throws IOException { + Map courseMap = new HashMap<>(); + Scan scan = new Scan(); + ResultScanner scanner = table.getScanner(scan); + for (Result result : scanner) { + String coursesStr = Bytes.toString( + result.getValue(Bytes.toBytes("training"), Bytes.toBytes("training_courses")) + ); + if (coursesStr != null) { + for (String course : coursesStr.split(",")) { + courseMap.put(course, courseMap.getOrDefault(course, 0)+1); + } + } + } + // 输出统计结果 +}