如何解决在Java中解析性能关键数据
| 我的文件中包含以下数据:P1==1,P3==123d3213213345
P1==2,P2==123321512332456*
P1==3,P2==123321451232123332*,P4==9512*
P1==4,P3==312512343243234*,P4==98*,P5=453213264
我需要将其映射到以下结构,其中上面一行数据作为输入。
private static class ReferenceData {
private int P1;
private String P2;
private String P3;
private String P4;
private String P5;
public ReferenceData(String line) {
//Parse and store in the corresponding class fields
}
}
这个单一的结构对于我的应用程序的性能绝对至关重要。
将上述结构解析并存储在其对应的类字段中最快的方法(我的意思是一种非常快速的方法)?
我已经完成了作业。我已经彻底剖析了代码。这正是瓶颈所在。它不是IO或其他任何东西。
哦!还有一件事,变量P1,P2-可以成千上万。这只是一个例子。
注意:
我不能使用jni
解决方法
我写了一个手工编码的状态机解析器和一个简单的基准测试。对于基准测试,我还包括了其他建议的解析策略:split和regex(请注意,这些方法不能按发布方式工作,因此我进行了一些更正)。
一些注意事项:
我使用了原始海报提供的示例数据,但是删除了多余的空格和错误的“ d”字符。我认为“ d”是一个错误。如果需要,可以增加处理空间并增加一些工作。
避免使用分配,例如\“ new \”运算符或将创建对象的方法。
避免调用外部方法-以某些代码重复为代价。 (注意:仍然可以取消Integer.parseInt()的情况,避免了外部调用并获得了性能更高的解析)。
首先是结果:
Warming up...
Benchmarking...
average parse time for SplitParser: 5154.4ns
average parse time for RegexParser: 1820.8ns
average parse time for StateMachineParser: 401.3ns
这是代码:
package test;
import java.text.ParseException;
import java.util.Arrays;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Parser {
private static final List<String> SAMPLE_INPUTS = Arrays.asList(
\"P1==1,P3==1233213213345\",\"P1==2,P2==123321512332456*\",\"P1==3,P2==123321451232123332*,P4==9512*\",\"P1==4,P3==312512343243234*,P4==98*,P5==453213264\");
public static void main(String... args) {
//test(new StateMachineParser());
//test(new RegexParser());
//test(new SplitParser());
benchmark(Arrays.asList(new SplitParser(),new RegexParser(),new StateMachineParser()));
}
private static void test(ReferenceDataParser parser) {
for (String input : SAMPLE_INPUTS) {
try {
System.err.println(parser.parse(input));
}
catch(ParseException pe) {
System.err.println(\"Failed to parse: \" + input);
int offset = pe.getErrorOffset();
StringBuilder buf = new StringBuilder(\" \");
for (int i = 0; i < offset; i++) {
buf.append(\' \');
}
buf.append(\'^\');
System.err.println(buf.toString());
pe.printStackTrace();
}
}
}
private static void benchmark(List<ReferenceDataParser> parsers) {
int warmupIters = 100 * 1000;
int iters = 1000 * 1000;
System.err.println(\"Warming up...\");
for (ReferenceDataParser parser : parsers) {
try {
for (String input : SAMPLE_INPUTS) {
for (int i = 0; i < warmupIters; i++) {
parser.parse(input);
}
}
}
catch(Exception e) {
System.err.println(\"parser failed: \" + parser.getClass().getSimpleName());
}
}
System.err.println(\"Benchmarking...\");
for (ReferenceDataParser parser : parsers) {
try {
long start = System.nanoTime();
for (String input : SAMPLE_INPUTS) {
for (int i = 0; i < iters; i++) {
parser.parse(input);
}
}
long elapsed = System.nanoTime() - start;
System.err.println(String.format(\"average parse time for %s: %.1fns\",parser.getClass().getSimpleName(),elapsed / (double) (iters * SAMPLE_INPUTS.size())));
}
catch(Exception e) {
System.err.println(\"parser failed: \" + parser.getClass().getSimpleName());
}
}
}
public static interface ReferenceDataParser {
public ReferenceData parse(String line) throws ParseException;
}
public static class ReferenceData {
private final int p1;
private final String p2;
private final String p3;
private final String p4;
private final String p5;
public ReferenceData(int p1,String p2,String p3,String p4,String p5) {
this.p1 = p1;
this.p2 = p2;
this.p3 = p3;
this.p4 = p4;
this.p5 = p5;
}
public String toString() {
return String.format(\"P1=%s,P2=%s,P3=%s,P4=%s,P5=%s\",p1,p2,p3,p4,p5);
}
}
private static class SplitParser implements ReferenceDataParser {
public ReferenceData parse(String line) throws ParseException {
int p1 = 0;
String p2 = null;
String p3 = null;
String p4 = null;
String p5 = null;
String lineSplit[] = line.split(\",\");
for(int i = 0; i < lineSplit.length; i++) {
String value = lineSplit[i].split(\"==\")[1];
if(lineSplit[i].startsWith(\"P1\")) {
p1 = Integer.valueOf(value);
}
else if(lineSplit[i].startsWith(\"P2\")) {
p2 = value;
}
else if(lineSplit[i].startsWith(\"P3\")) {
p3 = value;
}
else if(lineSplit[i].startsWith(\"P4\")) {
p4 = value;
}
else if(lineSplit[i].startsWith(\"P5\")) {
p5 = value;
}
}
return new ReferenceData(p1,p5);
}
}
private static class RegexParser implements ReferenceDataParser {
private static Pattern p = Pattern.compile(
\"(?:P1==(\\\\d+))(?:\\\\s*,P2==([0-9*]+))?(?:\\\\s*,P3==([0-9*]+))?(?:\\\\s*,P4==([0-9*]+))?(?:\\\\s*,P5==([0-9*]+))?\");
public ReferenceData parse(String line) throws ParseException {
Matcher m = p.matcher(line);
if(!m.matches()) {
throw new ParseException(line,0);
}
int p1 = Integer.parseInt(m.group(1));
String p2 = m.group(2);//note: this can be null is P2 is not part of the line
String p3 = m.group(3);
String p4 = m.group(4);
String p5 = m.group(5);
return new ReferenceData(p1,p5);
}
}
private static class StateMachineParser implements ReferenceDataParser {
private static final int STATE_INITIAL_P = 0;
private static final int STATE_P = 1;
private static final int STATE_P_NUM = 2;
private static final int STATE_EQ1 = 3;
private static final int STATE_EQ2 = 4;
private static final int STATE_VALUE = 5;
public ReferenceData parse(String line) throws ParseException {
int p1 = 0;
String p2 = null;
String p3 = null;
String p4 = null;
String p5 = null;
int state = STATE_INITIAL_P;
int length = line.length();
int pNum = 0;
int valueStart = 0;
int valueEnd = 0;
for (int i = 0; i < length; i++) {
char c = line.charAt(i);
switch(state) {
case STATE_INITIAL_P:
case STATE_P:
if (c != \'P\') {
throw new ParseException(line,i);
}
state = STATE_P_NUM;
break;
case STATE_P_NUM:
if (c < \'1\' || c > \'5\') {
throw new ParseException(line,i);
}
pNum = c - \'0\';
state = STATE_EQ1;
break;
case STATE_EQ1:
if (c != \'=\') {
throw new ParseException(line,i);
}
state = STATE_EQ2;
break;
case STATE_EQ2:
if (c != \'=\') {
throw new ParseException(line,i);
}
valueStart = valueEnd = i + 1;
state = STATE_VALUE;
break;
case STATE_VALUE:
if ((c >= \'0\' && c <= \'9\') || c == \'*\') {
valueEnd++;
}
else if (c == \',\') {
if (valueStart == valueEnd) {
throw new ParseException(line,i);
}
switch(pNum) {
case 1:
if (p1 != 0) {
throw new ParseException(line,i);
}
p1 = Integer.parseInt(line.substring(valueStart,valueEnd));
break;
case 2:
if (p2 != null) {
throw new ParseException(line,i);
}
p2 = line.substring(valueStart,valueEnd);
break;
case 3:
if (p3 != null) {
throw new ParseException(line,i);
}
p3 = line.substring(valueStart,valueEnd);
break;
case 4:
if (p4 != null) {
throw new ParseException(line,i);
}
p4 = line.substring(valueStart,valueEnd);
break;
case 5:
if (p5 != null) {
throw new ParseException(line,i);
}
p5 = line.substring(valueStart,valueEnd);
break;
default:
// illegal P-number
throw new ParseException(line,i);
}
state = STATE_P;
}
break;
}
}
switch(state) {
case STATE_INITIAL_P:
case STATE_P:
case STATE_P_NUM:
case STATE_EQ1:
case STATE_EQ2:
// invalid end-states
throw new ParseException(line,length);
case STATE_VALUE:
// valid end-state; finish with last parsed value
if (valueStart == valueEnd) {
throw new ParseException(line,length);
}
switch(pNum) {
case 1:
if (p1 != 0) {
throw new ParseException(line,length);
}
p1 = Integer.parseInt(line.substring(valueStart,valueEnd));
break;
case 2:
if (p2 != null) {
throw new ParseException(line,length);
}
p2 = line.substring(valueStart,valueEnd);
break;
case 3:
if (p3 != null) {
throw new ParseException(line,length);
}
p3 = line.substring(valueStart,valueEnd);
break;
case 4:
if (p4 != null) {
throw new ParseException(line,length);
}
p4 = line.substring(valueStart,valueEnd);
break;
case 5:
if (p5 != null) {
throw new ParseException(line,length);
}
p5 = line.substring(valueStart,valueEnd);
break;
default:
// illegal P-number
throw new ParseException(line,length);
}
break;
default:
throw new RuntimeException(\"unknown state: \" + state);
}
return new ReferenceData(p1,p5);
}
}
}
,绝对最快的方法是编写自己的基于状态机的解析器。
,使用正则表达式并假设P总是有序的:
static Pattern p = Pattern.compile(\"(?:P1=(\\d*)),\\s*(?:P2=(.*?))?,\\s*(?:P3=(.*?))?,\\s*(?:P4=(.*?))?,\\s*(?:P5=(.*?))?\");
public ReferenceData(String line) {
Matcher m = p.matcher(line);
if(m.match()){
P1 = Integer.parseInt(m.group(1));
P2 = m.group(2);//note: this can be null is P2 is not part of the line
P3 = m.group(3);
P4 = m.group(4);
P5 = m.group(5);
}
}
,我不确定这样做的速度,但是您应该能够使用一些简单的拆分来完成所需的操作,前提是该文件不会格式错误,也不必检查不良行:
public ReferenceData(String line) {
String lineSplit[] = line.split(\",\");
for(int i = 0; i < lineSplit.length; i++) {
String value = lineSplit[i].split(\"==\")[1];
if(lineSplit[i].equals(\"P1\")) {
this.P1 = Integer.valueOf(value);
}
else if(lineSplit[i].equals(\"P2\")) {
this.P2 = value;
}
else if(lineSplit[i].equals(\"P3\")) {
this.P3 = value;
}
else if(lineSplit[i].equals(\"P4\")) {
this.P4 = value;
}
else if(lineSplit[i].equals(\"P5\")) {
this.P5 = value;
}
}
}
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。