













#-------------------------- root ------------------------------
vi /etc/hosts
127.0.0.1 localhost.localdomain localhost
192.168.123.201 namenode hbase1
192.168.123.111 data01 hbase2
192.168.123.121 data02 hbase3
192.168.123.131 data03 hbase4
#java 설치
#java se 다운로드
tar xzvf jdk~~~~
mv jdk~~~~ /usr/local/java
#---------------------------- 생략해도 무방 -------------------------------
# /etc/ssh/sshd_config 수정
...
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
...
PermitEmptyPasswords yes
#restart
/etc/init.d/sshd restart
#----------------------------------------------------------------------------
#방화벽 disable
#iptable stop
#-------------------------- huser ------------------------------
#ssh 설정 (각 노드에서 모두 실행)
ssh-keygen -t rsa [enter] [enter] [enter]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@namenode [yes] [pwd]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@data01 [yes] [pwd]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@data02 [yes] [pwd]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@data03 [yes] [pwd]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@hbase1 [yes]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@hbase2 [yes]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@hbase3 [yes]
ssh-copy-id -i ~/.ssh/id_rsa.pub huser@hbase4 [yes]
#hadoop, hbase, zookeeper 다운로드
/home/huser/에 각각 tar 풀기.
mv /home/huser/hadoop~~~ /home/huser/hadoop
mv /home/huser/zookeeper~~~ /home/huser/zookeeper
mv /home/huser/hbase~~~ /home/huser/hbase
#계정환경설정
vi .bash_profile
export JAVA_HOME=/usr/local/java
export HADOOP_HOME=/home/huser/hadoop
export ZOOKEEPER_HOME=/home/huser/zookeeper
export HBASE_HOME=/home/huser/hbase
export CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/jre/lib/ext
export PATH=$JAVA_HOME/bin:$HADOOP_HOME/bin:$ZOOKEEPER_HOME/bin:$HBASE_HOME/bin:$PATH
export HADOOP_HOME_WARN_SUPPRESS=" "
-------------------- hadoop 설치 -------------------------------------------------------
#디렉토리 생성
#namenode
mkdir -p /home/huser/work/mapred/system
mkdir -p /home/huser/work/name
chmod -R 755 /home/huser/work
#datanode
mkdir -p /home/huser/work/mapred/system
mkdir -p /home/huser/work/data
chmod -R 755 /home/huser/work
#hadoop 환경 설정
vi /home/huser/hadoop/conf/hadoop-env.sh
export JAVA_HOME=/usr/local/java
vi /home/huser/hadoop/conf/core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://namenode:9000</value>
</property>
</configuration>
vi /home/huser/hadoop/conf/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/huser/work/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/huser/work/data</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
</configuration>
vi /home/huser/hadoop/conf/mapred-site.xml
<configuration>
<property>
<name>mapred.map.child.java.opts</name>
<value>-Xmx200m </value>
</property>
<property>
<name>mapred.reduce.child.java.opts</name>
<value>-Xmx200m </value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>hdfs://namenode:9001</value>
</property>
<property>
<name>mapred.system.dir</name>
<value>/home/huser/work/mapred/system</value>
</property>
</configuration>
vi /home/huser/hadoop/conf/masters
namenode
vi /home/huser/hadoop/conf/slaves
data01
data02
data03
#hadoop 띄우기
#hadoop이 관리하는 정보를 초기화
hadoop namenode -format
start-all.sh
#확인 (서버에서 확인)
# 하둡관리자
http://192.168.123.201:50070
# 맵리듀스 관리
http://192.168.123.201:50030
-------------------- Zookeeper 설치 -------------------------------------------------------
# 설정 파일 복서자
cd /home/huser/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
#설정 파일 수정
dataDir=/home/huser/zookeeper/data
server.1=hbase1:2888:3888
server.2=hbase2:2888:3888
server.3=hbase3:2888:3888
server.4=hbase4:2888:3888
#data dir 생성
mkdir /home/huser/zookeeper/data
vi /home/huser/zookeeper/data/myid
각각의 서버마다 1 ~ 4 입력
#시동(각각의 서버에서 실행)
zkServer.sh start
#실행 확인 (QuorumPeerMain 이 떠있으면 정상)
jps
-------------------- hbase 설치 -------------------------------------------------------
# 설정파일 수정 (주석 풀고 값 셋팅)
vi /home/huser/hbase/conf/hbase-env.sh
export JAVA_HOME=/usr/local/java/
export HBASE_CLASSPATH=/home/huser/hadoop/conf
export HBASE_MANAGES_ZK=false
vi /home/huser/hbase/conf/hbase-site.xml
<configuration>
<!-- hadoop 위치 설정 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://namenode:9000/hbase</value>
</property>
<!-- hadoop 포트 설정 -->
<property>
<name>hbase.master</name>
<value>namenode:60000</value>
</property>
<!-- 분산으로 설정이라 알려줌 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 몇개가 분산으로 될꺼에요 라고 알려줌 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- zookeeper 설정 -->
<property>
<name>hbase.zookeeper.quorum</name>
<value>hbase1,hbase2,hbase3,hbase4</value>
</property>
<!-- 포트 설정 1 -->
<property>
<name>hbase.zookeeper.peerport</name>
<value>2888</value>
</property>
<!-- 포트 설정 2 -->
<property>
<name>hbase.zookeeper.leaderport</name>
<value>3888</value>
</property>
</configuration>
vi /home/huser/hbase/conf/regionservers
hbase1
hbase2
hbase3
hbase4
# 시작
/home/huser/hbase/bin/start-hbase.sh
# 실행 확인
# hbase1(master)에는 HMaster, HRegionServer
# hbase2~4(regionServer)에는 HRegionServer
jps
# 하둡관리자 화면에서 hbase를 확인 가능
http://192.168.123.201:50070
----------------------------------------------------------------------------
#접속
[huser@data03 ~]$ hbase shell
HBase Shell; enter 'help<RETURN>' for list of supported commands.
Type "exit<RETURN>" to leave the HBase Shell
Version 0.94.1, r1365210, Tue Jul 24 18:40:10 UTC 2012
hbase(main):001:0>
hbase(main):002:0*
hbase(main):003:0*
hbase(main):036:0*
hbase(main):037:0* create 'test', 'data'
0 row(s) in 2.1480 seconds
hbase(main):038:0> put 'test', 'row1', 'data:1', 'value1'
0 row(s) in 0.1850 seconds
hbase(main):039:0> put 'test', 'row1', 'data:2', 'value2'
0 row(s) in 0.0190 seconds
hbase(main):040:0> scan 'test'
ROW COLUMN+CELL
row1 column=data:1, timestamp=1352866081968, value=value1
row1 column=data:2, timestamp=1352866087508, value=value2
1 row(s) in 0.1010 seconds
hbase(main):041:0> get 'test', 'row1'
COLUMN CELL
data:1 timestamp=1352866081968, value=value1
data:2 timestamp=1352866087508, value=value2
2 row(s) in 0.0540 seconds
hbase(main):042:0>
hbase(main):043:0*
hbase(main):044:0*
hbase(main):045:0* create 'Pokemon', {NAME=>'POKE_ID'}, {NAME=>'HEIGHT'}, {NAME=>'NAME'}
0 row(s) in 1.1990 seconds
hbase(main):046:0> put 'Pokemon', '10001', 'POKE_ID', '10001', 20090929
0 row(s) in 0.0440 seconds
hbase(main):047:0> put 'Pokemon', '10001', 'HEIGHT', '30', 20090929
0 row(s) in 0.0210 seconds
hbase(main):048:0> put 'Pokemon', '10001', 'NAME', 'PIKACHU', 20090929
0 row(s) in 0.0210 seconds
hbase(main):049:0> get 'Pokemon', '10001'
COLUMN CELL
HEIGHT: timestamp=20090929, value=30
NAME: timestamp=20090929, value=PIKACHU
POKE_ID: timestamp=20090929, value=10001
3 row(s) in 0.0210 seconds
hbase(main):050:0> scan 'Pokemon'
ROW COLUMN+CELL
10001 column=HEIGHT:, timestamp=20090929, value=30
10001 column=NAME:, timestamp=20090929, value=PIKACHU
10001 column=POKE_ID:, timestamp=20090929, value=10001
1 row(s) in 0.0450 seconds
hbase(main):051:0>
hbase(main):052:0*
hbase(main):083:0> disable 'test'
0 row(s) in 2.2800 seconds
hbase(main):084:0> drop 'test'
0 row(s) in 1.5530 seconds
hbase(main):085:0>
# 하둡관리자 화면에서 위의 테이블 확인 가능
http://192.168.123.201:50070
# Put Method
void put(Put put) throws IOException
Put(byte[] row)
Put(byte[] row, RowLock rowLock)
Put(byte[] row, long ts)
Put(byte[] row, long ts, RowLock rowLock)
# Method Description
getRow() - Returns the row key as specified when creating the Put instance.
getRowLock() - Returns the row RowLock instance for the current Put instance.
getLockId() - Returns the optional lock ID handed into the constructor using the rowLock parameter. Will be -1L if not set.
setWriteToWAL() - Allows you to disable the default functionality of writing the data to the server-side write-ahead log.
getWriteToWAL() - Indicates if the data will be written to the write-ahead log.
getTimeStamp() - Retrieves the associated timestamp of the Put instance. Can be optionally set using the constructor's ts parameter. If not set, may return Long.MAX_VALUE.
heapSize() - Computes the heap space required for the current Put instance. This includes all contained data and space needed for internal structures.
isEmpty() - Checks if the family map contains any KeyValue instances.
numFamilies() - Convenience method to retrieve the size of the family map, containing all KeyValue instances.
size() - Returns the number of KeyValue instances that will be added with this Put.
# Get Method
Result get(Get get) throws IOException
Get(byte[] row)
Get(byte[] row, RowLock rowLock)
# Method Description
getRow() - Returns the row key as specified when creating the Get instance.
getRowLock() - Returns the row RowLock instance for the current Get instance.
getLockId() - Returns the optional lock ID handed into the constructor using the rowLock parameter. Will be -1L if not set.
getTimeRange() - Retrieves the associated timestamp or time range of the Get instance. Note that there is no getTimeStamp() since the API converts a value assigned with set
TimeStamp() - into a TimeRange instance internally, setting the minimum and maximum values to the given timestamp.
setFilter()/getFilter() - Special filter instances can be used to select certain columns or cells, based on a wide variety of conditions. You can get and set them with these methods.
setCacheBlocks()/getCacheBlocks() - Each HBase region server has a block cache that efficiently retains recently accessed data for subsequent reads of contiguous information. In some events it is better to not engage the cache to avoid too much churn when doing completely random gets. These methods give you control over this feature.
numFamilies() - Convenience method to retrieve the size of the family map, containing the families added using the addFamily() or addColumn() calls.
hasFamilies() - Another helper to check if a family.or column.has been added to the current instance of the Get class.
familySet()/getFamilyMap() - These methods give you access to the column families and specific columns, as added by the addFamily() and/or addColumn() calls. The family map is a map where the key is the family name and the value a list of added column qualifiers for this particular family. The familySet() returns
# Delete Method
void delete(Delete delete) throws IOException
Delete(byte[] row)
Delete(byte[] row, long timestamp, RowLock rowLock)
Delete deleteFamily(byte[] family)
Delete deleteFamily(byte[] family, long timestamp)
Delete deleteColumns(byte[] family, byte[] qualifier)
Delete deleteColumns(byte[] family, byte[] qualifier, long timestamp)
Delete deleteColumn(byte[] family, byte[] qualifier)
Delete deleteColumn(byte[] family, byte[] qualifier, long timestamp)
void setTimestamp(long timestamp)
출처 : Gruter Technical Note
: http://www.bicdata.com/
: http://wiki.apache.org/hadoop/Hbase
: Ian Varley의 HBase Schema Design
: 한글 출처 http://www.cyworld.com/duetys/14411321