import java.io.*; import java.net.*; /** Initiate a request from a website using the HTTP 1.0 protocol with no options and echoes the response to standard output (console). The ouput could be captured to a file via redirecting. You can use the -d (data) option to capture only the HTML data, or -h to capture only the header returned by the web server. For additional input options see the main method. @author Bert Wachsmuth @version Sep. 28, 2007 */ public class WebGrab { public static final String VERSION = "1.0"; public static final int DEFAULT_PORT = 80; public static final String DEFAULT_FILE = "/"; private static final char OPTION_NONE = 'n'; private static final char OPTION_DATA = 'd'; private static final char OPTION_HEAD = 'h'; private static int port = DEFAULT_PORT; private static String host = ""; private static char option = OPTION_NONE; private static String file = DEFAULT_FILE; /** The main method to get the program started. It uses the input parameters from the command line. First parameter is the URL of the request, with default port 80 if no port is specified, and default file set to "/" is none is specified. @param args - [full URL of request, optional either -d (data only) or -h (header only)] */ public static void main(String[] args) { if (args.length < 2) { System.out.println("\nWebGrab " + VERSION + ", (c) 2007 Bert G. Wachsmuth\n"); System.out.println("This program requests a URL from a web server and returns the"); System.out.println("answer to the console.\n"); } if (args.length < 1) { System.out.println("Usage:"); System.out.println("\tjava WebGrab URL [option]"); System.out.println("\n\t\twhere\n"); System.out.println("\t* URL = fully qualified URL (e.g. http://sciris.edu:80/index.html)"); System.out.println("\t* option = -d (data only) or -h (header only)\n"); System.out.println(); System.exit(-1); } try { URL url = new URL(args[0]); if (!url.getProtocol().equalsIgnoreCase("http")) throw new MalformedURLException("protocol must be http"); host = url.getHost(); port = url.getPort(); if (port < 0) port = DEFAULT_PORT; file = url.getPath(); if ((file == null) || (file.equals(""))) file = DEFAULT_FILE; if (args.length >= 2) { if (args[1].equalsIgnoreCase("-d")) option = OPTION_DATA; else if (args[1].equalsIgnoreCase("-h")) option = OPTION_HEAD; else throw new Exception("Error - invalid program option"); } grab(host, file, port, option); } catch(MalformedURLException murle) { System.err.println("\nURL error: invalid URL [" + murle.getMessage() + "]\n"); } catch(UnknownHostException uhe) { System.err.println("\nHost Error: invalid host [" + uhe.getMessage() + "]\n"); } catch(IOException ioe) { System.err.println("\nIO Error: [" + ioe.getMessage() + "]\n"); } catch(Exception ex) { System.err.println("\nUnkown Error: [" + ex.getMessage() + "]\n"); } } /** Method to retrieve data from host and output to the console. Can display header only, data only, or both header and data. @param host The host to connect to @param port The port to connect to @param file The path and file to request @param option -d for data only, -h for header only, anything else for both */ public static void grab(String host, String file, int port, char option) throws UnknownHostException, IOException { Socket sock = new Socket(host, port); PrintWriter out = new PrintWriter( new OutputStreamWriter( sock.getOutputStream())); BufferedReader in = new BufferedReader( new InputStreamReader( sock.getInputStream())); // sending request out.println("GET " + file + " HTTP/1.0"); out.println(); out.flush(); // receiving header String line = in.readLine(); while ((line != null) && (!line.equals(""))) { if ((option == OPTION_NONE) || (option == OPTION_HEAD)) System.out.println(line); line = in.readLine(); } // getting data line = in.readLine(); while (line != null) { if ((option == OPTION_NONE) || (option == OPTION_DATA)) System.out.println(line); line = in.readLine(); } in.close(); out.close(); sock.close(); } }