您的当前位置:首页正文

在自己电脑配置cdh 版 hadoop 提交mr job客户端

2024-12-18 来源:东饰资讯网
image.png

1.首先 下载 与我们 cdh hadoop集群对应的 hadoop 安装文件

hadoop-2.6.0-cdh5.14.2.tar.gz

2. hadoop 解压到自己 本地电脑上

mac /opt

window D 盘

3.将测试服务器的hadoop 的配置文件 复制到自己的hadoop 配置文件目录

4.配置 环境变量 并使之生效

  1. 如果使用scala java 那先下载 jar包
    比如 scala build.sbt
name := "sbtawsHadoop"

version := "0.1"

scalaVersion := "2.12.6"

libraryDependencies ++= Seq(
    "org.apache.hadoop" % "hadoop-common" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-hdfs" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-client" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-mapreduce-client-core" % "2.6.0-cdh5.14.2",

    "org.apache.hadoop" % "hadoop-mapreduce-client-common" % "2.6.0-cdh5.14.2",
    "org.apache.hadoop" % "hadoop-mapreduce-client-jobclient" % "2.6.0-cdh5.14.2",
    "org.apache.hbase" % "hbase" % "1.2.0-cdh5.14.2"

)
unmanagedResourceDirectories in Compile += baseDirectory.value /"conf"
unmanagedResourceDirectories in Compile += baseDirectory.value /"data"
unmanagedResourceDirectories in Compile += baseDirectory.value /"public"
resourceDirectory in Compile := baseDirectory.value / "data"
resourceDirectory in Compile := baseDirectory.value / "conf"
resolvers += "Sonatype OSS Snapshots" at "https://oss.sonatype.org/content/repositories/snapshots"

resolvers += "cdh" at 

assemblyOutputPath in assembly := baseDirectory.value/"count-beat-80201.jar"
assemblyMergeStrategy in assembly := {
    case PathList("META-INF", xs@_*) => MergeStrategy.discard
    case x => MergeStrategy.first
}



Su hds

Ln -s /usr/local/hadoop-2.6.0-cdh5.14.2/bin/hadoop /usr/local/bin/hadoop
ln -s /usr/local/hadoop/bin/hadoop hadoop
sudo netstat -tulpn | grep :8020

Cdh 配置文件 路径
/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/

/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/yarn-site.xml

/var/log/hadoop-yarn/hadoop-cmf-yarn-RESOURCEMANAGER-cdhnode1.log.out

/opt/cloudera/parcel-repo//CDH-5.14.2-1.cdh5.14.2.p0.3/lib/hadoop-yarn/bin/yarn nodemanager

/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/bin/yarn

/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3/jars/hadoop-common-2.6.0-cdh5.14.2.jar

/usr/local/Cellar/hadoop/2.8.2/bin/hadoop jar ./count-beat-80201.jar ApplistCount

export HADOOP_HOME=/opt/cloudera/parcels/CDH-5.14.2-1.cdh5.14.2.p0.3
export YARN_HOME=$HADOOP_HOME
export PATH=$PATH:$HADOOP_HOME/bin:$YARN_HOME/bin

su hdfs
hadoop jar /opt/hadoop-mapreduce-examples-2.8.2.jar wordcount
/originData/clientlabel/output

hadoop jar ./GeoCreditPro-beat-2.0.jar ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output2

yarn resourcemanager

ApplistCount /originData/clientlabel/AA77p2_20180525.txt /originData/clientlabel/output4

image.png

core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<!--Autogenerated by Cloudera Manager-->
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://cdhnode1:8020</value>
    </property>
    <property>
        <name>fs.trash.interval</name>
        <value>1</value>
    </property>
    <property>
        
        
    </property>
    <property>
        <name>hadoop.security.authentication</name>
        <value>simple</value>
    </property>
    <property>
        <name>hadoop.security.authorization</name>
        <value>false</value>
    </property>
    <property>
        <name>hadoop.rpc.protection</name>
        <value>authentication</value>
    </property>
    <property>
        <name>hadoop.security.auth_to_local</name>
        <value>DEFAULT</value>
    </property>
    <property>
        <name>hadoop.proxyuser.oozie.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.oozie.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.mapred.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.mapred.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.flume.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.flume.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.HTTP.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.HTTP.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hive.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hive.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hue.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hue.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.httpfs.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.httpfs.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hdfs.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.hdfs.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.yarn.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.yarn.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.security.group.mapping</name>
        <value>org.apache.hadoop.security.ShellBasedUnixGroupsMapping</value>
    </property>
    <property>
        <name>hadoop.security.instrumentation.requires.admin</name>
        <value>false</value>
    </property>
    <property>
        <name>net.topology.script.file.name</name>
        <value>/etc/hadoop/conf.cloudera.yarn/topology.py</value>
    </property>
    <property>
        <name>hadoop.ssl.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>hadoop.proxyuser.llama.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.llama.groups</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.ssl.require.client.cert</name>
        <value>false</value>
        <final>true</final>
    </property>
    <property>
        <name>hadoop.ssl.keystores.factory.class</name>
        <value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value>
        <final>true</final>
    </property>
    <property>
        <name>hadoop.ssl.server.conf</name>
        <value>ssl-server.xml</value>
        <final>true</final>
    </property>
    <property>
        <name>hadoop.ssl.client.conf</name>
        <value>ssl-client.xml</value>
        <final>true</final>
    </property>
    <property>
        <name>hadoop.http.logs.enabled</name>
        <value>true</value>
    </property>
</configuration>

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera Manager-->
<configuration>
    <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///data1/dfs/nn,file:///data2/dfs/nn</value>
    </property>
    <property>
        <name>dfs.namenode.servicerpc-address</name>
        <value>cdhnode1:8022</value>
    </property>
    <property>
        <name>dfs.https.address</name>
        <value>cdhnode1:50470</value>
    </property>
    <property>
        <name>dfs.https.port</name>
        <value>50470</value>
    </property>
    <property>
        <name>dfs.namenode.http-address</name>
        <value>cdhnode1:50070</value>
    </property>
    <property>
        <name>dfs.replication</name>
        <value>2</value>
    </property>
    <property>
        <name>dfs.blocksize</name>
        <value>134217728</value>
    </property>
    <property>
        <name>dfs.client.use.datanode.hostname</name>
        <value>false</value>
    </property>
    <property>
        <name>fs.permissions.umask-mode</name>
        <value>022</value>
    </property>
    <property>
        <name>dfs.namenode.acls.enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.client.use.legacy.blockreader</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.client.read.shortcircuit</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.domain.socket.path</name>
        <value>/var/run/hdfs-sockets/dn</value>
    </property>
    <property>
        <name>dfs.client.read.shortcircuit.skip.checksum</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.client.domain.socket.data.traffic</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
        <value>true</value>
    </property>
</configuration>

mapreduce-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera Manager-->
<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapred.remote.os</name>
        <value>Linux</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>cdhnode1:19888</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.https.address</name>
        <value>cdhnode1:19890</value>
    </property>

    <property>
        <name>mapreduce.app-submission.cross-platform</name>
        <value>true</value>
    </property>

    <property>
        <name>mapreduce.application.classpath</name>
        <value>
            /usr/local/hadoop/etc/hadoop,
            /usr/local/hadoop/share/hadoop/common/*,
            /usr/local/hadoop/share/hadoop/common/lib/*,
            /usr/local/hadoop/share/hadoop/hdfs/*,
            /usr/local/hadoop/share/hadoop/hdfs/lib/*,
            /usr/local/hadoop/share/hadoop/mapreduce/*,
            /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
            /usr/local/hadoop/share/hadoop/yarn/*,
            /usr/local/hadoop/share/hadoop/yarn/lib/*
        </value>
    </property>
</configuration>

yarn-site.xml

<?xml version="1.0" encoding="UTF-8"?>

<!--Autogenerated by Cloudera Manager-->
<configuration>
    <property>
        <name>yarn.acl.enable</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.admin.acl</name>
        <value>*</value>
    </property>
    <property>
        <name>yarn.log-aggregation-enable</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>604800</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>cdhnode1:2181,cdhmaster:2181,cdhnode2:2181,cdhnode3:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
    <property>
        <name>yarn.client.failover-sleep-base-ms</name>
        <value>100</value>
    </property>
    <property>
        <name>yarn.client.failover-sleep-max-ms</name>
        <value>2000</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>yarnRM</value>
    </property>
    <property>
        <name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.id</name>
        <value>rm198</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm198</name>
        <value>cdhnode1:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm198</name>
        <value>cdhnode1:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm198</name>
        <value>cdhnode1:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm198</name>
        <value>cdhnode1:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm198</name>
        <value>cdhnode1:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.https.address.rm198</name>
        <value>cdhnode1:8090</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm214</name>
        <value>cdhnode3:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm214</name>
        <value>cdhnode3:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm214</name>
        <value>cdhnode3:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm214</name>
        <value>cdhnode3:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm214</name>
        <value>cdhnode3:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.https.address.rm214</name>
        <value>cdhnode3:8090</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm198,rm214</value>
    </property>
    <property>
        <name>yarn.resourcemanager.proxy-user-privileges.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.nodes.include-path</name>
        <value>/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_allow.txt</value>
    </property>
    <property>
        <name>yarn.resourcemanager.nodes.exclude-path</name>
        <value>/run/cloudera-scm-agent/process/350-yarn-RESOURCEMANAGER/nodes_exclude.txt</value>
    </property>
    <property>
        <name>yarn.resourcemanager.client.thread-count</name>
        <value>50</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.client.thread-count</name>
        <value>50</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.client.thread-count</name>
        <value>1</value>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>1024</value>
    </property>
    <property>
        <name>yarn.scheduler.increment-allocation-mb</name>
        <value>512</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>53931</value>
    </property>
    <property>
        <name>yarn.scheduler.minimum-allocation-vcores</name>
        <value>1</value>
    </property>
    <property>
        <name>yarn.scheduler.increment-allocation-vcores</name>
        <value>1</value>
    </property>
    <property>
        <name>yarn.scheduler.maximum-allocation-vcores</name>
        <value>24</value>
    </property>
    <property>
        <name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name>
        <value>1000</value>
    </property>
    <property>
        <name>yarn.am.liveness-monitor.expiry-interval-ms</name>
        <value>600000</value>
    </property>
    <property>
        <name>yarn.resourcemanager.am.max-attempts</name>
        <value>2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name>
        <value>600000</value>
    </property>
    <property>
        <name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name>
        <value>1000</value>
    </property>
    <property>
        <name>yarn.nm.liveness-monitor.expiry-interval-ms</name>
        <value>600000</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
        <value>50</value>
    </property>
    <property>
        <name>yarn.application.classpath</name>
        <value>$HADOOP_CLIENT_CONF_DIR,$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.allow-undeclared-pools</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.user-as-default-queue</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.preemption</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
        <value>0.8</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.sizebasedweight</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.assignmultiple</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.continuous-scheduling-enabled</name>
        <value>false</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.locality-delay-node-ms</name>
        <value>2000</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.locality-delay-rack-ms</name>
        <value>4000</value>
    </property>
    <property>
        <name>yarn.scheduler.fair.continuous-scheduling-sleep-ms</name>
        <value>5</value>
    </property>
    <property>
        <name>yarn.resourcemanager.max-completed-applications</name>
        <value>10000</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-timeout-ms</name>
        <value>60000</value>
    </property>


    <property>
        <name>yarn.application.classpath</name>
        <value>
        /usr/local/hadoop/etc/hadoop,
        /usr/local/hadoop/share/hadoop/common/*,
        /usr/local/hadoop/share/hadoop/common/lib/*,
        /usr/local/hadoop/share/hadoop/hdfs/*,
        /usr/local/hadoop/share/hadoop/hdfs/lib/*,
        /usr/local/hadoop/share/hadoop/mapreduce/*,
        /usr/local/hadoop/share/hadoop/mapreduce/lib/*,
        /usr/local/hadoop/share/hadoop/yarn/*,
        /usr/local/hadoop/share/hadoop/yarn/lib/*
    </value>
    </property>

</configuration>
显示全文