fix ingestion script not marking `done` when no results are returned

main
Avraham Sakal 1 year ago
parent a0970a7fca
commit 5614586b66

@ -27,6 +27,8 @@ const optionContractToTicker = ({
type PolygonResponse = { type PolygonResponse = {
next_url?: string; next_url?: string;
status: string;
resultsCount: number;
results: Array<{ results: Array<{
c: number; c: number;
h: number; h: number;
@ -80,10 +82,11 @@ async function getOptionAggregates(
).json()) as PolygonResponse, ).json()) as PolygonResponse,
{ retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 } { retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 }
); );
if (!latestBatchResponse.results) { if (latestBatchResponse.status.toLowerCase() !== "ok") {
console.log(latestBatchResponse); console.log(latestBatchResponse);
return; return;
} }
if (latestBatchResponse.resultsCount > 0) {
let latestBatch = latestBatchResponse.results.map((result) => ({ let latestBatch = latestBatchResponse.results.map((result) => ({
symbol: underlyingSymbol, symbol: underlyingSymbol,
expirationDate, expirationDate,
@ -105,6 +108,7 @@ async function getOptionAggregates(
}), }),
{ retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 } { retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 }
); );
}
await pRetry( await pRetry(
() => () =>
clickhouse.insert({ clickhouse.insert({
@ -137,19 +141,19 @@ async function getNextBatchOfUnstartedOptionAggregates(
limit: number limit: number
): Promise<Array<OptionContractDay>> { ): Promise<Array<OptionContractDay>> {
if (typeof previousUnstartedOptionContract === "undefined") { if (typeof previousUnstartedOptionContract === "undefined") {
return; return [];
} }
const optionContractsWithoutAggregates = await pRetry( const queryContents = `
() =>
query<OptionContractDay>(`
SELECT SELECT
asOfDate, asOfDate,
symbol, symbol,
expirationDate, expirationDate,
strike, strike,
type type,
argMax(status, ts) as status
FROM amg_option_aggregate_sync_statuses FROM amg_option_aggregate_sync_statuses
WHERE ( WHERE symbol IN ['AAPL','AMD','GOOGL','MSFT','NFLX']
AND (
( (
asOfDate = '${previousUnstartedOptionContract.asOfDate}' asOfDate = '${previousUnstartedOptionContract.asOfDate}'
AND symbol = '${previousUnstartedOptionContract.symbol}' AND symbol = '${previousUnstartedOptionContract.symbol}'
@ -177,13 +181,18 @@ async function getNextBatchOfUnstartedOptionAggregates(
asOfDate > '${previousUnstartedOptionContract.asOfDate}' asOfDate > '${previousUnstartedOptionContract.asOfDate}'
) )
) )
AND status = 'not-started' GROUP BY asOfDate, symbol, expirationDate, strike, type
HAVING status = 'not-started'
ORDER BY asOfDate, symbol, expirationDate, strike, type ORDER BY asOfDate, symbol, expirationDate, strike, type
LIMIT ${limit} LIMIT ${limit}
`), `;
//console.log(queryContents);
const optionContractsWithoutAggregates = await pRetry(
() => query<OptionContractDay>(queryContents),
{ retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 } { retries: 5, factor: 2, minTimeout: 1000, maxTimeout: 60 * 1000 }
); );
return optionContractsWithoutAggregates; console.log(`Got ${optionContractsWithoutAggregates.length} records`);
return optionContractsWithoutAggregates || [];
} }
/** /**
@ -194,7 +203,10 @@ async function getNextBatchOfUnstartedOptionAggregates(
* so as to start afresh. * so as to start afresh.
*/ */
async function revertPendingSyncs() { async function revertPendingSyncs() {
const pendingOptionContracts = await query<{ const batchSize = 1000;
let pendingOptionContracts;
do {
pendingOptionContracts = await query<{
asOfDate: string; asOfDate: string;
symbol: string; symbol: string;
expirationDate: string; expirationDate: string;
@ -207,10 +219,13 @@ async function revertPendingSyncs() {
symbol, symbol,
expirationDate, expirationDate,
strike, strike,
type type,
argMax(status, ts) as status
FROM amg_option_aggregate_sync_statuses FROM amg_option_aggregate_sync_statuses
WHERE status = 'pending' WHERE symbol IN ['AAPL','AMD','GOOGL','MSFT','NFLX']
ORDER BY asOfDate, symbol, expirationDate, strike, type GROUP BY asOfDate, symbol, expirationDate, strike, type
HAVING status = 'pending'
LIMIT ${batchSize}
`); `);
console.log( console.log(
"Pending operations:", "Pending operations:",
@ -219,66 +234,66 @@ async function revertPendingSyncs() {
`${symbol} ${expirationDate} ${strike} ${type} @ ${asOfDate}` `${symbol} ${expirationDate} ${strike} ${type} @ ${asOfDate}`
) )
); );
await pAll( await pSeries([
pendingOptionContracts.map(
({ asOfDate, symbol, expirationDate, strike, type }) =>
() =>
pSeries([
// Delete option_contracts first, in case this `pAll` operation fails and we need to restart; so `option_contract_sync_statuses` "pending" rows are still there for the restart // Delete option_contracts first, in case this `pAll` operation fails and we need to restart; so `option_contract_sync_statuses` "pending" rows are still there for the restart
() => () =>
clickhouse clickhouse
.command({ .command({
query: ` query: `
DELETE FROM option_aggregates DELETE FROM option_aggregates
WHERE symbol = '${symbol}' WHERE (symbol, expirationDate, strike, type, toDate(tsStart))
AND expirationDate = '${expirationDate}' IN [${pendingOptionContracts
AND strike = ${strike} .map(
AND type = '${type}' ({ asOfDate, symbol, expirationDate, strike, type }) =>
AND toDate(tsStart) = '${asOfDate}' `('${symbol}', '${expirationDate}', ${strike}, '${type}', '${asOfDate}')`
)
.join(",")}
]
`, `,
}) })
.then(() => { .then(() => {
console.log(`Deleted aggregates for `); console.log(`Deleted ${pendingOptionContracts.length} aggregates`);
}), }),
() => () =>
clickhouse clickhouse
.insert({ .insert({
table: "amg_option_aggregate_sync_statuses", table: "amg_option_aggregate_sync_statuses",
values: [ values: pendingOptionContracts.map(
{ ({ asOfDate, symbol, expirationDate, strike, type }) => ({
asOfDate, asOfDate,
symbol, symbol,
expirationDate, expirationDate,
strike, strike,
type, type,
status: "not-started", status: "not-started",
},
],
format: "JSONEachRow",
}) })
.then(() => {
console.log();
}),
])
), ),
{ concurrency: 1 } format: "JSONEachRow",
); })
.then(() => {}),
]);
} while (pendingOptionContracts.length === batchSize);
await clickhouse.command({
query: `
OPTIMIZE TABLE amg_option_aggregate_sync_statuses FINAL
`,
});
} }
// First, revert 'pending' syncs: // First, revert 'pending' syncs:
//await revertPendingSyncs(); await revertPendingSyncs();
/** Second, for each option contract, get all of its quotes. /** Second, for each option contract, get all of its quotes.
* *
* This queries Polygon with a concurrency of 6. * This queries Polygon with a concurrency of 16.
*/ */
const q = new pQueue({ concurrency: 6 }); const q = new pQueue({ concurrency: 16 });
/** Initialized with the lowest possible option contract. /** Initialized with the lowest possible option contract.
* It's passed into `getNextUnstartedSymbolAndAsOfDate()`. * It's passed into `getNextUnstartedSymbolAndAsOfDate()`.
*/ */
let nextBatchOfUnstartedOptionContracts: Array<OptionContractDay> = [ let nextBatchOfUnstartedOptionContracts: Array<OptionContractDay> = [
{ {
asOfDate: "2022-04-05", asOfDate: "2022-03-27",
symbol: "A", symbol: "A",
expirationDate: "2022-02-01", expirationDate: "2022-02-01",
strike: 0, strike: 0,
@ -289,8 +304,8 @@ while (
(nextBatchOfUnstartedOptionContracts = (nextBatchOfUnstartedOptionContracts =
await getNextBatchOfUnstartedOptionAggregates( await getNextBatchOfUnstartedOptionAggregates(
nextBatchOfUnstartedOptionContracts.pop(), nextBatchOfUnstartedOptionContracts.pop(),
200 100
)) !== null )).length !== 0
) { ) {
await pAll( await pAll(
nextBatchOfUnstartedOptionContracts.map( nextBatchOfUnstartedOptionContracts.map(

@ -77,7 +77,7 @@ CREATE TABLE amg_option_aggregate_sync_statuses (
ts DateTime64 DEFAULT now() ts DateTime64 DEFAULT now()
) )
ENGINE=AggregatingMergeTree ENGINE=AggregatingMergeTree
ORDER BY (asOfDate, symbol, expirationDate, strike, type, ts); ORDER BY (asOfDate, symbol, expirationDate, strike, type);
INSERT INTO amg_option_aggregate_sync_statuses INSERT INTO amg_option_aggregate_sync_statuses
SELECT asOfDate, symbol, expirationDate, strike, type, status, ts SELECT asOfDate, symbol, expirationDate, strike, type, status, ts

Loading…
Cancel
Save