@inproceedings{0532b7a8f2034111880526f9110817f1,
title = "Patent literatures translation system based on hadoop",
abstract = "In order to tackle the slow response caused by massive patent literatures, a patent literatures translation system based on Hadoop is proposed in this paper. The paper presents a hybrid storage structure and a parallel translation model for massive patent literatures. The hierarchical storage structure is based on HDFS (Hadoop Distributed File System), which stores the patent documents and HBase where directories of such data are stored. This hybrid structure enables faster retrieval through the distributed file system. In translation, The Hadoop MapReduce framework is utilized. The MapReduce computation model not only can translate the patent literatures in highly parallel, but also can process multiple documents simultaneously. The experimental results show that the proposed machine translation system in this paper has better translation performance than the conventional machine translation approach.",
keywords = "HBase, HDFS, Hadoop, MapReduce, machine translation, patent literatures",
author = "Di Zhang and Heyan Huang and Yonggang Huang",
year = "2014",
doi = "10.1007/978-3-642-55038-6_20",
language = "English",
isbn = "9783642550379",
series = "Lecture Notes in Electrical Engineering",
publisher = "Springer Verlag",
pages = "127--135",
booktitle = "Future Information Technology",
address = "Germany",
note = "9th FTRA InternationalConference on Future Information Technology, FutureTech 2014 ; Conference date: 28-05-2014 Through 31-05-2014",
}