import de.intags.abspieler.*;Browser browser = new Browser();
Window window = browser.newWindow("MyBrowserWindow");
Document document =
window.newDocument("http://www.intags.de/abspieler/demo/demo1.html");
document.retrieve();
System.out.println(document.getResponse().getContent());
Example:
package YourOwnPackage;
import de.intags.abspieler.*;
public class Demo1 {
public static void main(String[] args) throws AbspielerException {
Browser browser = new Browser();
Window window = browser.newWindow("MyBrowserWindow");
Document document = window.newDocument("http://www.intags.de/abspieler/demo/demo1.html");
document.retrieve();
System.out.println(document.getResponse().getContent());
}
}
For example: If you're looking for stock quotes, you might expect the stock quotes to be on a page
with a title containg the word "quote", and the stock quote has to be in a certain format, maybe a table with the ISIN on the left
and the price on the right, for example this one: http://www.intags.de/abspieler/demo/demo2.html.
So, you are looking for HTML code similar to:
<tr> <td>DE12345678</td> <td>Expensive Inc.</td> <td>538.34</td> </tr>
Pages that are not matching those expectations are not interesting to you. Normally, you don't want to analyze them.
But if a page matches,
you can retrieve the data very simple using methods of Abspieler.
If you also want to analyze pages that don't match your expectations, you can do that by
simply analyzing the complete response content (see above).
Browser browser = new Browser();
Window window = browser.newWindow("MyBrowserWindow");
Document document = window.newDocument("http://www.intags.de/abspieler/demo/demo2.html");
Expectation expectation_quotes = new Expectation("QuotesExpectation");
// Make sure that there was no error while retrieving this page.
expectation_quotes.addPossibleStatus(200);
// Make sure that this page has the correct content type. You can check for any header you like.
expectation_quotes.addNeededHeader("Content-type", "text/html");
// Only pages with the word "quote" in the title are relevant.
// The first argument is the name that you will need for getting the data.
// This has to be a regular expression including all HTML tags.
expectation_quotes.addNeededContent("title", "<title>.*quote.*</title>");
If you don't know how to use regular expressions, read the SUN tutorial.
expectation_quotes.addNeededContent("quotes",
"<tr>[^<]*<td>([^<]*)</td>[^<]*<td>([^<]*)</td>[^<]*<td>([^<]*)</td>[^<]*</tr>");
document.addExpectation(expectation_quotes);
document.retrieve();
CategorizedResponse catResponse = document.getCategorizedResponse();
if (catResponse.getExpectation() != null &&
catResponse.getExpectation().getExpectationName().equals("QuotesExpectation")) {
for (int i = 0; i < catResponse.getMatchCount("quotes"); i ++)
System.out.println("FOUND:");
System.out.println("\tComplete String: " + catResponse.getMatch("quotes", 0, i));
System.out.println("\tISIN:" + catResponse.getMatch("quotes", 1, i));
System.out.println("\tName:" + catResponse.getMatch("quotes", 2, i));
System.out.println("\tPrice:" + catResponse.getMatch("quotes", 3, i));
System.out.println();
package YourOwnPackage;
import de.intags.abspieler.*;
public class Demo2 {
public static void main(String[] args) throws AbspielerException {
Browser browser = new Browser();
Window window = browser.newWindow("MyBrowserWindow");
Document document = window.newDocument("http://www.intags.de/abspieler/demo/demo2.html");
Expectation expectation_quotes = new Expectation("QuotesExpectation");
// Make sure that there was no error while retrieving this page.
expectation_quotes.addPossibleStatus(200);
// Make sure that this page has the correct content type. You can check for any header you like.
expectation_quotes.addNeededHeader("Content-type", "text/html");
// Only pages with the word "quote" in the title are relevant.
// The first argument is the name that you will need for getting the data.
// This has to be a regular expression including all HTML tags.
expectation_quotes.addNeededContent("title", "<title>.*quote.*</title>");
expectation_quotes.addNeededContent("quotes",
"<tr>[^<]*<td>([^<]*)</td>[^<]*<td>([^<]*)</td>[^<]*<td>([^<]*)</td>[^<]*</tr>");
document.addExpectation(expectation_quotes);
document.retrieve();
CategorizedResponse catResponse = document.getCategorizedResponse();
if (catResponse.getExpectation() != null &&
catResponse.getExpectation().getExpectationName().equals("QuotesExpectation")) {
for (int i = 0; i < catResponse.getMatchCount("quotes"); i ++) {
System.out.println("FOUND:");
System.out.println("\tComplete String: " + catResponse.getMatch("quotes", 0, i));
System.out.println("\tISIN:" + catResponse.getMatch("quotes", 1, i));
System.out.println("\tName:" + catResponse.getMatch("quotes", 2, i));
System.out.println("\tPrice:" + catResponse.getMatch("quotes", 3, i));
System.out.println();
System.out.println();
}
}
}
}