Modifier and Type | Class and Description |
---|---|
private static class |
MediawikiImporter.convertConsumer |
private static class |
MediawikiImporter.convertWriter |
static class |
MediawikiImporter.indexMaker |
private static class |
MediawikiImporter.indexProducer |
private static class |
MediawikiImporter.PositionAwareReader |
private static class |
MediawikiImporter.wikiConsumer |
class |
MediawikiImporter.wikiparserrecord |
private static class |
MediawikiImporter.wikiraw |
static class |
MediawikiImporter.wikisourcerecord |
Modifier and Type | Field and Description |
---|---|
private int |
approxdocs |
int |
count |
private long |
docsize |
private static int |
docspermbinxmlbz2 |
private java.lang.String |
hostport |
static Importer |
job |
private static java.lang.String |
pageend |
private static byte[] |
pageendb |
private static java.lang.String |
pagestart |
private static byte[] |
pagestartb |
java.io.File |
sourcefile |
private long |
start |
java.io.File |
targetdir |
private static java.lang.String |
textend |
private static java.lang.String |
textstart |
private java.lang.String |
urlStub |
Constructor and Description |
---|
MediawikiImporter(java.io.File sourcefile,
java.io.File targetdir) |
Modifier and Type | Method and Description |
---|---|
static void |
checkIndex(java.io.File mediawikixml) |
int |
count() |
static void |
createIndex(java.io.File dumpFile) |
static MediawikiImporter.wikisourcerecord |
find(java.lang.String title,
java.io.File f) |
static java.io.File |
idxFromMediawikiXML(java.io.File mediawikixml) |
static void |
main(java.lang.String[] s) |
MediawikiImporter.wikiparserrecord |
newRecord() |
MediawikiImporter.wikiparserrecord |
newRecord(java.lang.String hostport,
java.lang.String urlStub,
java.lang.String title,
java.lang.StringBuilder sb) |
static byte[] |
read(java.io.File f,
long start,
int len) |
long |
remainingTime()
return the remaining seconds for the completion of all records in milliseconds
|
void |
run()
the run method from runnable
|
long |
runningTime()
return the time this import is already running
|
java.lang.String |
source() |
int |
speed()
return the number of articles per second
|
java.lang.String |
status() |
activeCount, checkAccess, clone, countStackFrames, currentThread, destroy, dumpStack, enumerate, getAllStackTraces, getContextClassLoader, getDefaultUncaughtExceptionHandler, getId, getName, getPriority, getStackTrace, getState, getThreadGroup, getUncaughtExceptionHandler, holdsLock, interrupt, interrupted, isAlive, isDaemon, isInterrupted, join, join, join, resume, setContextClassLoader, setDaemon, setDefaultUncaughtExceptionHandler, setName, setPriority, setUncaughtExceptionHandler, sleep, sleep, start, stop, stop, suspend, toString, yield
private static final java.lang.String textstart
private static final java.lang.String textend
private static final java.lang.String pagestart
private static final java.lang.String pageend
private static final byte[] pagestartb
private static final byte[] pageendb
private static final int docspermbinxmlbz2
public static Importer job
public java.io.File sourcefile
public java.io.File targetdir
public int count
private long start
private final long docsize
private final int approxdocs
private java.lang.String hostport
private java.lang.String urlStub
public MediawikiImporter(java.io.File sourcefile, java.io.File targetdir)
public int speed()
public long remainingTime()
remainingTime
in interface Importer
public long runningTime()
Importer
runningTime
in interface Importer
public void run()
Importer
public static void checkIndex(java.io.File mediawikixml)
public static java.io.File idxFromMediawikiXML(java.io.File mediawikixml)
public static void createIndex(java.io.File dumpFile) throws java.io.IOException
java.io.IOException
public MediawikiImporter.wikiparserrecord newRecord()
public MediawikiImporter.wikiparserrecord newRecord(java.lang.String hostport, java.lang.String urlStub, java.lang.String title, java.lang.StringBuilder sb)
public static byte[] read(java.io.File f, long start, int len)
public static MediawikiImporter.wikisourcerecord find(java.lang.String title, java.io.File f) throws java.io.IOException
java.io.IOException
public static void main(java.lang.String[] s)