import java.io.*; import java.util.*; /* * @author Lyndon Walker and Stephen Cope * Department of Statistics, The University of Auckland * Copyright (c) 2008 * * This Java class stub provides a way to process a portion of an input file * using the Sun Grid Engine. For full documentation refer to: * http://www.stat.auckland.ac.nz/~kimihia/sun-grid#qsub-java-partial * * A typical usage example: * qsub -t 1-50 Simulation.sh input.csv * * Simulation.sh contains: * #$ -S /bin/sh * java Simulation $@ $SGE_TASK_ID $SGE_TASK_LAST * */ public class Simulation { static Simulation sim; public int getInputFileLines(String filename) { long fileSize = 0; int fileLines = 0; try { // get size of file (so we know where to seek to) RandomAccessFile r = new RandomAccessFile(filename, "r"); fileSize = r.length(); r.close(); FileReader i = new FileReader(filename); LineNumberReader l = new LineNumberReader(i); // now go there l.skip(fileSize); fileLines = l.getLineNumber(); // Some may say that we should decrement the above // value, however in my experience that is only // necessary if you have a blank line at the end of // your input data. I'm working with well-formed CSV // files here, so this is correct. l.close(); i.close(); } catch (IOException e) { System.err.println("Problem reading file: "+filename); Runtime.getRuntime().exit(1); } return fileLines; } public int [] readfile(String filename, int task_start, int task_last) { int [] counts = new int [2]; counts[0] = counts[1] = 0; // calculate which records to keep int record_count_all = getInputFileLines(filename); // how many in the file int record_count_subset = (record_count_all/task_last); // split the records equally amongst tasks int record_start = record_count_subset * (task_start - 1); // where this process starts reading if ( record_start > 0 ) record_start ++; int record_count = 0; // how many have been read //System.err.println("About to read lines: "+record_start // DEBUG //+" and the next "+record_count_subset+" (of "+record_count_all+")"); // DEBUG // read in csv file try { FileReader input = new FileReader(filename); BufferedReader bufRead = new BufferedReader(input); String line; // String that holds current file line // skip over the first records that we are ignoring for ( int skip_count = 1 ; skip_count < record_start ; skip_count++ ) bufRead.readLine(); // either use the line above or the line below!!! DEBUG //System.err.println(""+skip_count+" discarding: "+bufRead.readLine()); // DEBUG // Read first line line = bufRead.readLine(); record_count++; // Read through file one line at time. Print line # and line // Quit when either: // - we reach the end of the file // - we have read our subset of records while (line != null && record_count <= record_count_subset) { //System.err.println("parsing line: "+line); // DEBUG String[] tokens = line.split(","); if (Integer.parseInt(tokens[0]) == 1) ++counts[0]; else ++counts[1]; line = bufRead.readLine(); record_count++; } bufRead.close(); } catch (IOException e) { e.printStackTrace(); System.err.println("Problem reading file: "+filename); Runtime.getRuntime().exit(1); } return counts; } /* readfile */ public static void main(String [] args) { int sge_task_id = 1; int sge_task_last = 1; if ( args.length < 1 ) { System.err.println("usage: Simulation filename [SGE_TASK_ID SGE_TASK_LAST]"); Runtime.getRuntime().exit(1); } if ( args.length >= 3 ) { try { sge_task_id = Integer.parseInt(args[args.length-2]); sge_task_last = Integer.parseInt(args[args.length-1]); System.err.println("SGE task numbers: "+sge_task_id+" of "+sge_task_last); } catch (NumberFormatException e) { System.err.println("Ignoring unparseable SGE task numbers: " +args[args.length-2]+","+args[args.length-1]); sge_task_id = 1; sge_task_last = 1; } } sim = new Simulation(); int counts [] = sim.readfile(args[0], sge_task_id, sge_task_last); // simulation doMagicUpon(counts); } /* main */ public static void doMagicUpon(int [] data) { // TODO your simulation goes here } } /* class*/