organize ingestion scripts

master
Avraham Sakal 3 months ago
parent 61eeaf125e
commit 2863a7903f

@ -0,0 +1,99 @@
import { restClient } from "@polygon.io/client-js";
import { client } from "./clickhouse.js";
import PQueue from "p-queue";
import dotenv from "dotenv";
import { Env } from "@humanwhocodes/env";
dotenv.config();
const env = new Env();
const POLYGON_API_KEY = env.require("POLYGON_API_KEY");
const CONCURRENCY = Number.parseInt(env.get("CONCURRENCY", "6"));
const STOCK_QUOTES_TABLE = env.get("STOCK_QUOTES_TABLE", "stock_aggregates");
const START_DATE = env.get("START_DATE", "2022-08-01");
const END_DATE = env.get("END_DATE", "2022-12-31");
const polygon = restClient(POLYGON_API_KEY, "https://api.polygon.io", {
pagination: true,
});
const queue = new PQueue({ concurrency: CONCURRENCY });
function* dateRange(
startDateStr: string,
endDateStr: string
): Generator<string, void, unknown> {
// Convert the start and end date strings to Date objects
const startDate = new Date(startDateStr);
const endDate = new Date(endDateStr);
// Loop from the start date to the end date
for (
let currentDate = startDate;
currentDate <= endDate;
currentDate.setDate(currentDate.getDate() + 1)
) {
// Format the current date as YYYY-MM-DD
const formattedDate = currentDate.toISOString().split("T")[0];
// Yield the formatted date
yield formattedDate;
}
}
for (const date of dateRange(START_DATE, END_DATE)) {
/** To check if the market is open on the given date, we just fetch a few (10,
* for now) rows from the option_quotes table. If there are not 10 rows, then
* the market is closed. */
const isMarketOpen =
(
await (
await client.query({
query: `
SELECT *
FROM option_quotes
WHERE ts >= '${date} 00:00:00'
AND ts <= '${date} 23:59:59'
LIMIT 10
`,
format: "JSONEachRow",
})
).json()
).length === 10;
if (isMarketOpen) {
await client.command({
query: `
INSERT INTO option_quotes_filled
SELECT
option_type,
symbol,
expiration_date,
strike,
ts,
bid,
ask
FROM option_quotes
WHERE ts >= '${date} 00:00:00'
AND ts <= '${date} 23:59:59'
ORDER BY option_type, symbol, expiration_date, strike, ts
WITH FILL FROM toDateTime('${date} 09:30:00', 'America/New_York') TO toDateTime('${date} 16:15:01', 'America/New_York') INTERPOLATE (bid, ask)
`,
});
}
console.log("finished", date);
}
// const data = await polygon.options.quotes(
// "O:SPY241220P00600000",
// {
// "timestamp.gte": "2024-12-15",
// "timestamp.lte": "2024-12-16",
// sort: "timestamp",
// order: "asc",
// limit: 50000,
// },
// { pagination: true }
// );
// console.log(data.status);
// console.log(data.results?.length);

@ -0,0 +1,107 @@
import { restClient } from "@polygon.io/client-js";
import { client } from "./clickhouse.js";
import PQueue from "p-queue";
import dotenv from "dotenv";
import { Env } from "@humanwhocodes/env";
dotenv.config();
const env = new Env();
const POLYGON_API_KEY = env.require("POLYGON_API_KEY");
const CONCURRENCY = Number.parseInt(env.get("CONCURRENCY", "6"));
const STOCK_QUOTES_TABLE = env.get("STOCK_QUOTES_TABLE", "stock_aggregates");
const START_DATE = env.get("START_DATE", "2022-03-07");
const END_DATE = env.get("END_DATE", "2022-12-31");
const polygon = restClient(POLYGON_API_KEY, "https://api.polygon.io", {
pagination: true,
});
const queue = new PQueue({ concurrency: CONCURRENCY });
function* dateRange(
startDateStr: string,
endDateStr: string
): Generator<string, void, unknown> {
// Convert the start and end date strings to Date objects
const startDate = new Date(startDateStr);
const endDate = new Date(endDateStr);
// Loop from the start date to the end date
for (
let currentDate = startDate;
currentDate <= endDate;
currentDate.setDate(currentDate.getDate() + 1)
) {
// Format the current date as YYYY-MM-DD
const formattedDate = currentDate.toISOString().split("T")[0];
// Yield the formatted date
yield formattedDate;
}
}
for (const date of dateRange(START_DATE, END_DATE)) {
for (let hour = 13; hour <= 21; hour++) {
await client.command({
query: `
INSERT INTO calendar_stats
SELECT
date,
dte,
span,
moniness,
calendar_midpoint as price,
count(*) as number_of_quotes
FROM (
SELECT
toDate('${date}') as date,
date_diff('days', toDate(front.ts), front.expiration_date) as dte,
date_diff('days', front.expiration_date, back.expiration_date) as span,
floor((stock.price - strike) / stock.price, 2) as moniness,
back.bid - front.bid as calendar_bid,
back.ask - front.ask as calendar_ask,
floor((calendar_bid + calendar_ask) / 2, 2) as calendar_midpoint
FROM option_quotes_filled AS front
INNER JOIN option_quotes_filled AS back
ON front.option_type = back.option_type
AND front.symbol = back.symbol
AND front.strike = back.strike
AND front.expiration_date < back.expiration_date
AND front.ts = back.ts
INNER JOIN stock_aggregates_filled AS stock
ON front.symbol = stock.symbol
AND front.ts = stock.ts
WHERE front.ts >= '${date} ${hour}:00:00'
AND front.ts < '${date} ${hour + 1}:00:00'
)
GROUP BY date, dte, span, moniness, price
ORDER BY date, dte, span, moniness, price
SETTINGS allow_experimental_join_condition = 1
;
`,
// clickhouse_settings: {
// http_receive_timeout: 9999,
// receive_timeout: 9999,
// stream_poll_timeout_ms: 9999000,
// receive_data_timeout_ms: 9999000,
// },
});
console.log("finished", date, hour);
}
}
// const data = await polygon.options.quotes(
// "O:SPY241220P00600000",
// {
// "timestamp.gte": "2024-12-15",
// "timestamp.lte": "2024-12-16",
// sort: "timestamp",
// order: "asc",
// limit: 50000,
// },
// { pagination: true }
// );
// console.log(data.status);
// console.log(data.results?.length);

@ -19,6 +19,7 @@ export const client = createClient({
output_format_json_quote_64bit_floats: 0,
date_time_input_format: "best_effort",
},
request_timeout: 3600000,
});
// client.insert({

Loading…
Cancel
Save