public final class TextParser
extends java.lang.Object
Modifier and Type | Field and Description |
---|---|
private static java.util.Map<java.lang.String,java.lang.Object> |
denyExtensionx |
private static java.util.Map<java.lang.String,java.lang.Object> |
denyMime |
private static java.util.Map<java.lang.String,java.lang.String> |
ext2mime |
private static java.util.concurrent.ConcurrentHashMap<java.lang.String,java.util.LinkedHashSet<Parser>> |
ext2parser |
private static Parser |
genericIdiom |
private static java.util.Map<java.lang.String,java.util.LinkedHashSet<Parser>> |
mime2parser |
private static java.lang.Object |
v |
Constructor and Description |
---|
TextParser() |
Modifier and Type | Method and Description |
---|---|
static java.lang.String |
getDenyExtension() |
static java.lang.String |
getDenyMime() |
static void |
grantExtension(java.lang.String ext,
boolean grant) |
static void |
grantMime(java.lang.String mime,
boolean grant) |
private static void |
initParser(Parser parser) |
static java.lang.String |
mimeOf(MultiProtocolURL url) |
static java.lang.String |
mimeOf(java.lang.String ext) |
private static java.lang.String |
normalizeMimeType(java.lang.String mimeType) |
static java.util.Set<Parser> |
parsers() |
private static java.util.Set<Parser> |
parsers(MultiProtocolURL url,
java.lang.String mimeType1)
find a parser for a given url and mime type
because mime types returned by web severs are sometimes wrong, we also compute the mime type again
from the extension that can be extracted from the url path.
|
private static Document[] |
parseSource(DigestURL location,
java.lang.String mimeType,
Parser parser,
java.lang.String charset,
VocabularyScraper scraper,
int timezoneOffset,
java.io.InputStream sourceStream) |
private static Document[] |
parseSource(DigestURL location,
java.lang.String mimeType,
java.util.Set<Parser> parsers,
java.lang.String charset,
VocabularyScraper scraper,
int timezoneOffset,
int depth,
byte[] sourceArray) |
static Document[] |
parseSource(DigestURL location,
java.lang.String mimeType,
java.lang.String charset,
VocabularyScraper scraper,
int timezoneOffset,
int depth,
byte[] content) |
static Document[] |
parseSource(DigestURL location,
java.lang.String mimeType,
java.lang.String charset,
VocabularyScraper scraper,
int timezoneOffset,
int depth,
java.io.File sourceFile) |
static Document[] |
parseSource(DigestURL location,
java.lang.String mimeType,
java.lang.String charset,
VocabularyScraper scraper,
int timezoneOffset,
int depth,
long contentLength,
java.io.InputStream sourceStream) |
static void |
setDenyExtension(java.lang.String denyList) |
static void |
setDenyMime(java.lang.String denyList) |
static java.lang.String |
supports(MultiProtocolURL url,
java.lang.String mimeType)
check if the parser supports the given content.
|
static java.lang.String |
supportsExtension(MultiProtocolURL url)
checks if the parser supports the given extension.
|
static java.lang.String |
supportsExtension(java.lang.String ext)
checks if the parser supports the given extension.
|
static java.lang.String |
supportsMime(java.lang.String mimeType)
checks if the parser supports the given mime type.
|
private static final java.lang.Object v
private static final Parser genericIdiom
private static final java.util.Map<java.lang.String,java.util.LinkedHashSet<Parser>> mime2parser
private static final java.util.concurrent.ConcurrentHashMap<java.lang.String,java.util.LinkedHashSet<Parser>> ext2parser
private static final java.util.Map<java.lang.String,java.lang.String> ext2mime
private static final java.util.Map<java.lang.String,java.lang.Object> denyMime
private static final java.util.Map<java.lang.String,java.lang.Object> denyExtensionx
public static java.util.Set<Parser> parsers()
private static void initParser(Parser parser)
public static Document[] parseSource(DigestURL location, java.lang.String mimeType, java.lang.String charset, VocabularyScraper scraper, int timezoneOffset, int depth, java.io.File sourceFile) throws java.lang.InterruptedException, Parser.Failure
java.lang.InterruptedException
Parser.Failure
public static Document[] parseSource(DigestURL location, java.lang.String mimeType, java.lang.String charset, VocabularyScraper scraper, int timezoneOffset, int depth, byte[] content) throws Parser.Failure
Parser.Failure
public static Document[] parseSource(DigestURL location, java.lang.String mimeType, java.lang.String charset, VocabularyScraper scraper, int timezoneOffset, int depth, long contentLength, java.io.InputStream sourceStream) throws Parser.Failure
Parser.Failure
private static Document[] parseSource(DigestURL location, java.lang.String mimeType, Parser parser, java.lang.String charset, VocabularyScraper scraper, int timezoneOffset, java.io.InputStream sourceStream) throws Parser.Failure
Parser.Failure
private static Document[] parseSource(DigestURL location, java.lang.String mimeType, java.util.Set<Parser> parsers, java.lang.String charset, VocabularyScraper scraper, int timezoneOffset, int depth, byte[] sourceArray) throws Parser.Failure
Parser.Failure
public static java.lang.String supports(MultiProtocolURL url, java.lang.String mimeType)
url
- mimeType
- private static java.util.Set<Parser> parsers(MultiProtocolURL url, java.lang.String mimeType1) throws Parser.Failure
url
- the given urlmimeType
- the given mime typeParser.Failure
public static java.lang.String supportsMime(java.lang.String mimeType)
mimeType
- public static java.lang.String supportsExtension(java.lang.String ext)
extention
- public static java.lang.String supportsExtension(MultiProtocolURL url)
extention
- public static java.lang.String mimeOf(MultiProtocolURL url)
public static java.lang.String mimeOf(java.lang.String ext)
private static java.lang.String normalizeMimeType(java.lang.String mimeType)
public static void setDenyMime(java.lang.String denyList)
public static java.lang.String getDenyMime()
public static void grantMime(java.lang.String mime, boolean grant)
public static void setDenyExtension(java.lang.String denyList)
public static java.lang.String getDenyExtension()
public static void grantExtension(java.lang.String ext, boolean grant)